This report describes the results of a preregistered study available at: https://osf.io/wjxa4. The goal was to validate a new proposed scale, the Needs Orientation Scale.
library(rempsyc)
library(lavaanExtra)
library(lavaan)
library(dplyr)
library(purrr)
library(parameters)
library(see)
library(performance)
library(correlation)
library(flextable)
library(sjlabelled)
library(datawizard)
library(report)
library(visdat)
library(bestNormalize)
library(tidyr)
library(psych)
library(nFactors)# Read data
# source("data/survey_448961_R_syntax_file_numeric.R")
# saveRDS(data, "data/raw_data_processed.rds")
data <- readRDS("data/raw_data_processed.rds")
report_participants(data, threshold = 1) %>% cat1207 participants (Mean age = 43.6, SD = 13.4, range: [3, 85], 1.5% missing; Gender: 59.3% women, 37.6% men, 1.57% non-binary, 1.49% missing)
In this section, we are preparing the data for analysis by taking care of preliminary exclusions and checking for and exploring missing values.
Here, we report on participant exclusions and corresponding changes on the sample.
# Check names/row numbers
# names(data)
rename_needs <- function(x) {
gsub("a|b", "", x) %>%
gsub("N", "needs_", .)%>%
gsub("R", "rel", .) %>%
gsub("A", "aut", .)%>%
gsub("C", "comp", .)%>%
gsub("D", "deficit", .) %>%
gsub("G", "growth", .)
}
needs.names <- data %>%
select(NR_D1:NCb_G9) %>%
names
data <- data %>%
rename_with(~rename_needs(.x), all_of(needs.names)) %>%
rename(BF_E1r = "BF_rE1") %>%
rename(BF_C3r = "BF_rC3") %>%
rename(BF_N4r = "BF_rN4") %>%
rename(BF_O5r = "BF_rO5") %>%
rename(BF_A7r = "BF_rA7") %>%
rename(SAc_ACC6r = "SAc_rACC6") %>%
rename(SAc_ACC8r = "SAc_rACC8") %>%
rename(SAc_TR13r = "SAc_rTR13") %>%
rename(SAc_ACC14r = "SAc_rACC14") %>%
rename(PEQ_Au3r = "PEQ_rAu3") %>%
rename(PEQ_Au5r = "PEQ_rAu5") %>%
rename(PEQ_No6r = "PEQ_rNo6") %>%
rename(PEQ_Au7r = "PEQ_rAu7") %>%
rename(PEQ_Au9r = "PEQ_rAu9")
data %>%
select(starts_with("SAc_")) %>%
names## [1] "SAc_EAE1" "SAc_AuDi2" "SAc_TR3" "SAc_EAE4" "SAc_AuDi5"
## [6] "SAc_ACC6r" "SAc_DUn7" "SAc_ACC8r" "SAc_AuDi9" "SAc_AuDi10"
## [11] "SAc_AuDi11" "SAc_Dun12" "SAc_TR13r" "SAc_ACC14r" "SAc_TR15"
# Get data labels
dataset.labels <- data.frame(vars = attributes(data)$variable.labels) |>
t() |>
as.data.frame()
names(dataset.labels) <- names(data[1:244])
extract_labels <- \(x) {
gsub(".*?\\[(.*?)\\].*", "\\1", x)}
dataset.labels <- dataset.labels %>%
lapply(extract_labels) %>%
as.data.frame()
# Test it
dataset.labels$Sco## [1] "What is the highest level of education you have completed or the highest degree you have obtained?"
## [1] "Asian"
## [1] "they allow me to learn about myself."
# Which participants have entered an ID != 11?
data %>%
mutate(Code = trimws_toupper(Code)) %>%
filter(nchar(Code) < 12 | nchar(Code) > 16) %>%
select(id, Code) %>%
View()
# 14 people with weird codes.
# Let's exclude "REMITEST" at least since that was me.
data <- data %>%
mutate(Code = trimws_toupper(Code)) %>%
#filter(nchar(Code) >= 12 & nchar(Code) <= 16)
filter(Code != "REMITEST")
# This also removes 17 rows with no ID or responses at all
# Caro: should we exclude those with wrong participant IDs?
# Check for duplicate Respondent ID!!
sum(duplicated(data$Code))## [1] 1
# There's 1 duplicates Respondent IDs
# Let's investigate them
data %>%
rempsyc::extract_duplicates("Code") %>%
View()
# Let's keep the best duplicate
data <- data %>%
best_duplicate("Code")## (1 duplicates removed)
Let’s also exclude those who failed 2 or more attention checks (i.e., keep with those with a score of two or more).
# Get the names of the attention check items
ATT <- data %>%
select(contains("ATT")) %>%
names()
ATT[1] “N1_ATT1” “PGD_ATT2” “PEQ_ATT3”
| N1_ATT1 | PGD_ATT2 | PEQ_ATT3 |
|---|---|---|
| For this question, select the answer “Not true at all” | For this question, select the answer “Strongly agree” | For this question, select the answer: “Moderately Agree” |
| N1_ATT1 | PGD_ATT2 | PEQ_ATT3 |
|---|---|---|
| 1 | 7 | 4 |
# Let's create a new column to see if they got it right
data <- data %>%
mutate(attention1 = .[[ATT[1]]] == 1,
attention2 = .[[ATT[2]]] == 7,
attention3 = .[[ATT[3]]] == 4)
# How many people didn't give the correct attention check 1?
nrow(data) - sum(data$attention1, na.rm = TRUE) [1] 190
# 185 people
# How many people didn't give the correct attention check 2?
nrow(data) - sum(data$attention2, na.rm = TRUE) [1] 215
# 210 people
# How many people didn't give the correct attention check 2?
nrow(data) - sum(data$attention3, na.rm = TRUE) [1] 250
# 244 people
# Calculate sum of attention scores
data <- data %>%
mutate(attention.total = rowSums(select(
., attention1, attention2, attention3),
na.rm = TRUE))
# Check the distribution of scores
data %>%
count(attention.total)| attention.total | n |
|---|---|
| 0 | 154 |
| 1 | 54 |
| 2 | 85 |
| 3 | 895 |
## [1] 1
# There's 1 duplicate IP addresses
# Let's investigate them
data %>%
rempsyc::extract_duplicates("ipaddr") %>%
View()
# Remove duplicated IP Addresses
data <- data %>%
best_duplicate("ipaddr")## (1 duplicates removed)
# Calculate sum of missing values, per row
data <- data %>%
mutate(sum.NA = rowSums(is.na(select(
., needs_rel_deficit1:WB_4))),
NA.percent = round(sum.NA / ncol(select(
., needs_rel_deficit1:WB_4)), 2))
# Count how many missing values
data %>%
count(sum.NA, NA.percent)| sum.NA | NA.percent | n |
|---|---|---|
| 0 | 0.00 | 972 |
| 13 | 0.06 | 1 |
| 24 | 0.11 | 2 |
| 44 | 0.20 | 2 |
| 59 | 0.26 | 2 |
# Check filter for excluding those with more than 50% missing values
data %>%
filter(NA.percent > 0.50) %>%
select(Code, sum.NA, NA.percent)| Code | sum.NA | NA.percent |
|---|
Here, we report various sample demographics.
979 participants (Mean age = 43.8, SD = 13.7, range: [20, 85]; Gender: 61.2% women, 37.0% men, 1.84% non-binary)
## [1] "What is the highest level of education you have completed or the highest degree you have obtained?"
| Sco | n |
|---|---|
| Bachelor’s degree and/or University certificate | 439 |
| Graduate degree | 210 |
| High school degree | 207 |
| Vocational degree | 119 |
| No high school degree | 4 |
## [1] "Other"
data <- data %>%
mutate(Ra_1 = ifelse(Ra_1 == "Yes", "White", NA),
Ra_2 = ifelse(Ra_2 == "Yes", "Black", NA),
Ra_3 = ifelse(Ra_3 == "Yes", "Native", NA),
Ra_4 = ifelse(Ra_4 == "Yes", "Asian", NA),
Ra_5 = ifelse(Ra_5 == "Yes", "Hawaiian", NA),
Ra_6 = ifelse(Ra_6 == "Yes", "Other", NA)) %>%
unite("Race", Ra_1:Ra_6, na.rm = TRUE, sep = ", ") %>%
mutate(Race = ifelse(grepl(",", .data$Race), "Other", Race),
Race = ifelse(Race == "", "Other", Race))
data %>%
count(Race, sort = TRUE)| Race | n |
|---|---|
| White | 743 |
| Black | 108 |
| Other | 70 |
| Asian | 49 |
| Native | 6 |
| Hawaiian | 3 |
Here, we explore missing data, first by item and questionnaire, then
using the visdat package, and finally using Little’s MCAR
test.
# Check for nice_na
nice_na(data, scales = c(
"NR_", "NA_", "NC_", "GMS_", "N1_", "N2_", "MS_", "P_", "BF_",
"Tem_", "MPO_", "Pers_", "PGD_", "SAc_", "GiL_", "GMI_", "Gr_",
"PEQ_", "PGI_", "WB_"))| var | items | na | cells | na_percent | na_max | na_max_percent | all_na |
|---|---|---|---|---|---|---|---|
| NA:NA | 0 | 0 | 0 | NaN | 0 | NaN | 979 |
| NA:NA | 0 | 0 | 0 | NaN | 0 | NaN | 979 |
| NA:NA | 0 | 0 | 0 | NaN | 0 | NaN | 979 |
| GMS_ID1:GMS_INT18 | 18 | 0 | 17622 | 0.00 | 0 | 0.00 | 0 |
| N1_SA1:N1_ATT1 | 13 | 0 | 12727 | 0.00 | 0 | 0.00 | 0 |
| N2_SA13:N2_FC24 | 12 | 0 | 11748 | 0.00 | 0 | 0.00 | 0 |
| MS_AF1:MS_AF3 | 3 | 0 | 2937 | 0.00 | 0 | 0.00 | 0 |
| P_H1:P_P5 | 17 | 0 | 16643 | 0.00 | 0 | 0.00 | 0 |
| BF_E1r:BF_O10 | 10 | 0 | 9790 | 0.00 | 0 | 0.00 | 0 |
| Tem_Avo1:Tem_Avo12 | 12 | 0 | 11748 | 0.00 | 0 | 0.00 | 0 |
| MPO_MAp1:MPO_PAv12 | 12 | 0 | 11748 | 0.00 | 0 | 0.00 | 0 |
| Pers_F1:Pers_R8 | 8 | 0 | 7832 | 0.00 | 0 | 0.00 | 0 |
| PGD_A1:PGD_ATT2 | 11 | 0 | 10769 | 0.00 | 0 | 0.00 | 0 |
| SAc_EAE1:SAc_TR15 | 15 | 30 | 14685 | 0.20 | 15 | 100.00 | 2 |
| GiL_1:GiL_6 | 6 | 24 | 5874 | 0.41 | 6 | 100.00 | 4 |
| GMI_Ref1:GMI_Ref8 | 8 | 32 | 7832 | 0.41 | 8 | 100.00 | 4 |
| Gr_1:Gr_6 | 6 | 24 | 5874 | 0.41 | 6 | 100.00 | 4 |
| PEQ_Au1:PEQ_ATT3 | 11 | 66 | 10769 | 0.61 | 11 | 100.00 | 6 |
| PGI_1:PGI_9 | 9 | 63 | 8811 | 0.72 | 9 | 100.00 | 7 |
| WB_1:WB_4 | 4 | 28 | 3916 | 0.72 | 4 | 100.00 | 7 |
| Total | 291 | 27707 | 284889 | 9.73 | 96 | 32.99 | 0 |
A few items are missing here and there.
In this section, we perform the Exploratory Factor Analysis.
Andy Field (2012) notes:
It’s a good idea to have a look at the correlation matrix, for the reasons we discussed earlier [If we measure several variables, or ask someone several questions about themselves, the correlation between each pair of variables (or questions) can be arranged in what’s known as an R-matrix. An R-matrix is just a correlation matrix: a table of correlation coefficients between variables… The diagonal elements of an R-matrix are all ones because each variable will correlate perfectly with itself… By reducing a data set from a group of interrelated variables into a smaller set of factors, factor analysis achieves parsimony by explaining the maximum amount of common variance in a correlation matrix using the smallest number of explanatory constructs… In factor analysis we strive to reduce this R-matrix down into its underlying dimensions by looking at which variables seem to cluster together in a meaningful way. This data reduction is achieved by looking for variables that correlate highly with a group of other variables, but do not correlate with variables outside of that group].
Let’s look at the correlation matrix then.
But first, let’s select half our sample at random as specified in the prereg.
data_EFA <- data %>%
rename_with(~gsub("needs_", "", .)) %>%
select(rel_deficit1:comp_growth9) %>%
slice(row_indices)
x <- data_EFA %>%
correlation(p_adjust = "none") %>%
summary() %>%
plot()
plotly::ggplotly(x)This is not really working, as the matrix is too large (too many items). Let’s look at it in Excel instead.
##
##
## [Correlation matrix 'items_matrix.xlsx' has been saved to working directory (or where specified).]
## Warning in xl_open.default(paste0(filename, ".xlsx")): will not open file when
## not interactive
## NULL
Field continues:
You should be comfortable with the idea that to do a factor analysis we need to have variables that correlate fairly well, but not perfectly. Also, any variables that correlate with no others should be eliminated. Therefore, we can use this correlation matrix to check the pattern of relationships. First, 1) scan the matrix for correlations greater than .3, then 2) look for variables that only have a small number of correlations greater than this value. Then 3) scan the correlation coefficients themselves and look for any greater than .9. If any are found then you should be aware that a problem could arise because of multicollinearity in the data.”
Looking at the r matrix, we can see:
## [1] "it's something that has affected me negatively in the past and I don't want to relive it."
So far, so good.
As well as looking at the correlation matrix, we should run Bartlett’s test and the KMO on the correlation matrix.
Let’s run the Bartlett’s test:
## R was not square, finding R from data
## $chisq
## [1] 16643.96
##
## $p.value
## [1] 0
##
## $df
## [1] 1225
For factor analysis to work we need some relationships between variables and if the R-matrix were an identity matrix then all correlation coefficients would be zero. Therefore, we want this test to be significant (i.e., have a significance value less than .05). A significant test tells us that the R-matrix is not an identity matrix; therefore, there are some relationships between the variables we hope to include in the analysis.
The test is significant, thus factor analysis is appropriate.
Let’s run the Kaiser–Meyer–Olkin test:
## [1] 0.95
## comp_growth9 comp_growth5 comp_growth8 aut_growth7 aut_growth3
## 0.97 0.97 0.97 0.97 0.96
## comp_growth4 aut_growth8 aut_growth2 comp_deficit5 aut_growth1
## 0.96 0.96 0.96 0.96 0.96
## comp_growth1 comp_growth2 comp_deficit6 comp_growth6 aut_deficit4
## 0.96 0.96 0.96 0.96 0.96
## comp_deficit8 comp_growth3 aut_deficit10 rel_growth3 aut_deficit8
## 0.96 0.96 0.96 0.96 0.96
## aut_growth4 aut_deficit6 rel_growth4 comp_deficit3 comp_growth7
## 0.95 0.95 0.95 0.95 0.95
## aut_deficit7 aut_deficit9 rel_growth5 comp_deficit9 aut_growth5
## 0.95 0.95 0.94 0.94 0.94
## rel_growth6 rel_deficit7 aut_deficit1 comp_deficit2 rel_growth1
## 0.94 0.94 0.94 0.94 0.94
## rel_growth2 comp_deficit7 aut_growth6 comp_deficit1 aut_deficit3
## 0.94 0.93 0.93 0.93 0.93
## rel_deficit1 comp_deficit4 rel_growth7 rel_deficit2 aut_deficit5
## 0.93 0.93 0.92 0.92 0.92
## rel_deficit5 rel_deficit4 aut_deficit2 rel_deficit3 rel_deficit6
## 0.91 0.90 0.89 0.88 0.79
No item has a KMO value smaller than .5.
Multicollinearity can be detected by looking at the determinant of the R-matrix.
# determinant of the correlation matrix
r_matrix <- cor(data_EFA, use = "pairwise.complete.obs")
det(r_matrix)## [1] 0.0000000000000004333248
One simple heuristic is that the determinant of the R-matrix should be greater than 0.00001.” (i.e., 1e-5)
Our determinant is smaller than 0.00001 (it is 0.0000000000000004158026). As such, our determinant does seem problematic.
After checking the determinant, you can, if necessary, eliminate variables that you think are causing the problem. … If you have reason to believe that the correlation matrix has multicollinearity then you could look through the correlation matrix for variables that correlate very highly (R > .8) and consider eliminating one of the variables (or more, depending on the extent of the problem) before proceeding. You may have to try some trial and error to work out which variables are creating the problem (it’s not always the two with the highest correlation, it could be a larger number of variables with correlations that are not obviously too large).
Normally, we should try dropping some items to see if the determinant of the R-matrix improves. But we will skip this step for now.
For our present purposes we will use principal components analysis, which strictly speaking isn’t factor analysis; however, the two procedures may often yield similar results… I mentioned earlier that when conducting principal components analysis we begin by establishing the linear variates within the data and then decide how many of these variates to retain (or ‘extract’). Therefore, our starting point is to create a principal components model that has the same number of factors as there are variables in the data: by doing this we are just reducing the data set down to its underlying factors. By extracting as many factors as there are variables we can inspect their eigenvalues and make decisions about which factors to extract. (Note that if you use factor analysis, rather than principal components analysis, you need to extract fewer factors than you have variables – so if you have 23 variables, extracting 18, or so, factors should be OK.)
## Principal Components Analysis
## Call: principal(r = r_matrix, nfactors = 50, rotate = "none")
## Standardized loadings (pattern matrix) based upon correlation matrix
## PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8 PC9 PC10 PC11
## rel_deficit1 0.36 0.63 0.22 -0.24
## rel_growth1 0.43 0.59 -0.23 -0.23
## rel_deficit2 0.33 0.59 0.32 0.25 -0.25
## rel_deficit3 0.28 0.57 0.38 0.21 0.30 -0.22
## rel_growth2 0.41 0.67
## rel_growth3 0.43 0.55
## rel_deficit4 0.30 0.49 0.36 0.24 0.28
## rel_growth4 0.44 0.60
## rel_deficit5 0.37 0.61 0.29 -0.23
## rel_growth5 0.39 0.65 0.21
## rel_growth6 0.41 0.65
## rel_growth7 0.28 0.63 -0.29
## rel_deficit6 0.43 0.52 0.39 0.22 0.31
## rel_deficit7 0.40 0.62 0.20 0.26
## aut_growth1 0.63 -0.26 -0.24 0.33
## aut_deficit1 0.50 -0.35 0.22 -0.26 0.26
## aut_growth2 0.66 -0.23 -0.27
## aut_deficit2 0.30 0.63 -0.23
## aut_deficit3 0.57 -0.36 0.35 -0.25 -0.26
## aut_growth3 0.70 -0.24 0.25
## aut_growth4 0.62 -0.27 0.31 0.21
## aut_growth5 0.62 -0.30 0.29 0.33 -0.23
## aut_deficit4 0.54 -0.34 0.38 -0.28
## aut_deficit5 0.44 -0.22 0.56 -0.20
## aut_deficit6 0.49 -0.30 0.49
## aut_deficit7 0.58 -0.29 0.25 -0.38 -0.21
## aut_deficit8 0.55 0.33 0.32 0.27
## aut_growth6 0.57 -0.23 -0.36 -0.27
## aut_growth7 0.70 -0.31
## aut_growth8 0.59 -0.25 -0.28 -0.24
## aut_deficit9 0.54 -0.34 0.47
## aut_deficit10 0.66 -0.20 0.35 -0.21 -0.26
## comp_growth1 0.72 -0.28 -0.21
## comp_deficit1 0.57 0.57
## comp_deficit2 0.60 -0.21 0.47 0.23
## comp_growth2 0.72 -0.35 -0.22
## comp_deficit3 0.69 0.24 0.21 0.22 0.22
## comp_growth3 0.72 -0.26 -0.33
## comp_deficit4 0.51 0.32 0.37 -0.22 -0.34
## comp_growth4 0.69 -0.34
## comp_deficit5 0.68 0.31 -0.26 0.23
## comp_deficit6 0.64 0.36 -0.21 -0.24
## comp_growth5 0.75 -0.35 -0.27
## comp_growth6 0.72 -0.34 -0.24
## comp_deficit7 0.60 0.54 0.20
## comp_deficit8 0.64 -0.21 0.35
## comp_growth7 0.74 -0.35
## comp_deficit9 0.69 0.27 0.22 0.33
## comp_growth8 0.69 -0.30
## comp_growth9 0.72 -0.34
## PC12 PC13 PC14 PC15 PC16 PC17 PC18 PC19 PC20 PC21 PC22
## rel_deficit1 -0.28
## rel_growth1 -0.20
## rel_deficit2
## rel_deficit3
## rel_growth2
## rel_growth3 -0.21 -0.31
## rel_deficit4 0.23 -0.31
## rel_growth4 0.25
## rel_deficit5
## rel_growth5
## rel_growth6 -0.21 -0.25
## rel_growth7 0.35
## rel_deficit6
## rel_deficit7
## aut_growth1 0.24
## aut_deficit1 -0.24 0.24 0.24
## aut_growth2
## aut_deficit2 0.20
## aut_deficit3
## aut_growth3 0.23
## aut_growth4
## aut_growth5
## aut_deficit4 -0.23
## aut_deficit5 0.22
## aut_deficit6 0.29 0.24
## aut_deficit7 0.24
## aut_deficit8 0.21 0.23
## aut_growth6 -0.22
## aut_growth7 -0.23
## aut_growth8 0.24 0.22
## aut_deficit9
## aut_deficit10
## comp_growth1
## comp_deficit1
## comp_deficit2 -0.20
## comp_growth2
## comp_deficit3
## comp_growth3
## comp_deficit4
## comp_growth4 0.35
## comp_deficit5
## comp_deficit6 0.22
## comp_growth5
## comp_growth6
## comp_deficit7
## comp_deficit8 0.21 0.21
## comp_growth7
## comp_deficit9 -0.22
## comp_growth8 -0.21
## comp_growth9
## PC23 PC24 PC25 PC26 PC27 PC28 PC29 PC30 PC31 PC32 PC33
## rel_deficit1
## rel_growth1
## rel_deficit2
## rel_deficit3
## rel_growth2
## rel_growth3 0.23
## rel_deficit4
## rel_growth4 0.30
## rel_deficit5
## rel_growth5 -0.24
## rel_growth6 -0.24
## rel_growth7 0.24
## rel_deficit6
## rel_deficit7
## aut_growth1
## aut_deficit1
## aut_growth2 0.21
## aut_deficit2
## aut_deficit3
## aut_growth3
## aut_growth4
## aut_growth5
## aut_deficit4 -0.24
## aut_deficit5
## aut_deficit6
## aut_deficit7
## aut_deficit8 -0.24
## aut_growth6 0.21
## aut_growth7
## aut_growth8 -0.20
## aut_deficit9 -0.26
## aut_deficit10
## comp_growth1 0.27
## comp_deficit1
## comp_deficit2
## comp_growth2
## comp_deficit3
## comp_growth3
## comp_deficit4 -0.29
## comp_growth4
## comp_deficit5 0.23
## comp_deficit6 -0.23 0.20
## comp_growth5
## comp_growth6
## comp_deficit7
## comp_deficit8 0.22 0.23
## comp_growth7
## comp_deficit9
## comp_growth8 -0.21
## comp_growth9 -0.22
## PC34 PC35 PC36 PC37 PC38 PC39 PC40 PC41 PC42 PC43 PC44 PC45 PC46
## rel_deficit1
## rel_growth1
## rel_deficit2
## rel_deficit3
## rel_growth2
## rel_growth3
## rel_deficit4
## rel_growth4
## rel_deficit5
## rel_growth5
## rel_growth6
## rel_growth7
## rel_deficit6
## rel_deficit7
## aut_growth1
## aut_deficit1
## aut_growth2
## aut_deficit2
## aut_deficit3
## aut_growth3
## aut_growth4
## aut_growth5
## aut_deficit4
## aut_deficit5
## aut_deficit6
## aut_deficit7
## aut_deficit8
## aut_growth6
## aut_growth7
## aut_growth8
## aut_deficit9
## aut_deficit10
## comp_growth1
## comp_deficit1
## comp_deficit2
## comp_growth2
## comp_deficit3
## comp_growth3
## comp_deficit4
## comp_growth4
## comp_deficit5
## comp_deficit6
## comp_growth5
## comp_growth6
## comp_deficit7
## comp_deficit8
## comp_growth7
## comp_deficit9
## comp_growth8
## comp_growth9 0.23
## PC47 PC48 PC49 PC50 h2 u2 com
## rel_deficit1 1 0.00000000000000022 5.4
## rel_growth1 1 0.00000000000000278 6.1
## rel_deficit2 1 -0.00000000000000444 6.4
## rel_deficit3 1 0.00000000000000067 6.6
## rel_growth2 1 0.00000000000000000 4.3
## rel_growth3 1 0.00000000000000044 6.9
## rel_deficit4 1 -0.00000000000000155 9.3
## rel_growth4 1 -0.00000000000000178 5.5
## rel_deficit5 1 -0.00000000000000089 5.8
## rel_growth5 1 0.00000000000000111 4.8
## rel_growth6 1 0.00000000000000000 4.6
## rel_growth7 1 -0.00000000000000044 5.2
## rel_deficit6 1 -0.00000000000000089 6.6
## rel_deficit7 1 -0.00000000000000022 5.5
## aut_growth1 1 0.00000000000000167 5.4
## aut_deficit1 1 0.00000000000000078 9.4
## aut_growth2 1 0.00000000000000322 4.8
## aut_deficit2 1 -0.00000000000000111 5.4
## aut_deficit3 1 0.00000000000000089 6.8
## aut_growth3 1 0.00000000000000278 4.0
## aut_growth4 1 0.00000000000000100 5.7
## aut_growth5 1 0.00000000000000022 5.6
## aut_deficit4 1 0.00000000000000144 7.5
## aut_deficit5 1 0.00000000000000056 6.5
## aut_deficit6 1 0.00000000000000178 7.1
## aut_deficit7 1 0.00000000000000144 6.5
## aut_deficit8 1 0.00000000000000133 7.6
## aut_growth6 1 0.00000000000000133 7.1
## aut_growth7 1 0.00000000000000244 3.8
## aut_growth8 1 0.00000000000000155 6.8
## aut_deficit9 1 0.00000000000000167 6.4
## aut_deficit10 1 0.00000000000000178 4.7
## comp_growth1 1 0.00000000000000200 3.5
## comp_deficit1 1 0.00000000000000233 4.6
## comp_deficit2 1 0.00000000000000111 5.4
## comp_growth2 1 0.00000000000000189 3.4
## comp_deficit3 1 0.00000000000000244 4.2
## comp_growth3 1 0.00000000000000122 3.5
## comp_deficit4 1 0.00000000000000100 7.9
## comp_growth4 1 0.00000000000000200 3.9
## comp_deficit5 1 0.00000000000000244 4.2
## comp_deficit6 1 0.00000000000000056 5.1
## comp_growth5 1 0.00000000000000178 3.0
## comp_growth6 1 0.00000000000000233 3.4
## comp_deficit7 1 0.00000000000000122 4.5
## comp_deficit8 1 0.00000000000000078 5.2
## comp_growth7 1 0.00000000000000078 3.2
## comp_deficit9 1 0.00000000000000111 4.0
## comp_growth8 1 0.00000000000000333 4.0
## comp_growth9 1 0.00000000000000144 3.4
##
## PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8 PC9 PC10 PC11
## SS loadings 16.03 6.04 3.86 2.71 1.46 1.12 1.05 0.94 0.86 0.78 0.73
## Proportion Var 0.32 0.12 0.08 0.05 0.03 0.02 0.02 0.02 0.02 0.02 0.01
## Cumulative Var 0.32 0.44 0.52 0.57 0.60 0.62 0.65 0.66 0.68 0.70 0.71
## Proportion Explained 0.32 0.12 0.08 0.05 0.03 0.02 0.02 0.02 0.02 0.02 0.01
## Cumulative Proportion 0.32 0.44 0.52 0.57 0.60 0.62 0.65 0.66 0.68 0.70 0.71
## PC12 PC13 PC14 PC15 PC16 PC17 PC18 PC19 PC20 PC21 PC22
## SS loadings 0.70 0.68 0.62 0.62 0.57 0.56 0.54 0.51 0.50 0.47 0.47
## Proportion Var 0.01 0.01 0.01 0.01 0.01 0.01 0.01 0.01 0.01 0.01 0.01
## Cumulative Var 0.73 0.74 0.75 0.76 0.78 0.79 0.80 0.81 0.82 0.83 0.84
## Proportion Explained 0.01 0.01 0.01 0.01 0.01 0.01 0.01 0.01 0.01 0.01 0.01
## Cumulative Proportion 0.73 0.74 0.75 0.76 0.78 0.79 0.80 0.81 0.82 0.83 0.84
## PC23 PC24 PC25 PC26 PC27 PC28 PC29 PC30 PC31 PC32 PC33
## SS loadings 0.46 0.43 0.42 0.41 0.40 0.39 0.37 0.36 0.35 0.34 0.31
## Proportion Var 0.01 0.01 0.01 0.01 0.01 0.01 0.01 0.01 0.01 0.01 0.01
## Cumulative Var 0.85 0.85 0.86 0.87 0.88 0.89 0.89 0.90 0.91 0.91 0.92
## Proportion Explained 0.01 0.01 0.01 0.01 0.01 0.01 0.01 0.01 0.01 0.01 0.01
## Cumulative Proportion 0.85 0.85 0.86 0.87 0.88 0.89 0.89 0.90 0.91 0.91 0.92
## PC34 PC35 PC36 PC37 PC38 PC39 PC40 PC41 PC42 PC43 PC44
## SS loadings 0.31 0.30 0.29 0.28 0.27 0.26 0.25 0.24 0.24 0.22 0.21
## Proportion Var 0.01 0.01 0.01 0.01 0.01 0.01 0.00 0.00 0.00 0.00 0.00
## Cumulative Var 0.93 0.93 0.94 0.94 0.95 0.96 0.96 0.96 0.97 0.97 0.98
## Proportion Explained 0.01 0.01 0.01 0.01 0.01 0.01 0.00 0.00 0.00 0.00 0.00
## Cumulative Proportion 0.93 0.93 0.94 0.94 0.95 0.96 0.96 0.96 0.97 0.97 0.98
## PC45 PC46 PC47 PC48 PC49 PC50
## SS loadings 0.21 0.20 0.19 0.18 0.16 0.15
## Proportion Var 0.00 0.00 0.00 0.00 0.00 0.00
## Cumulative Var 0.98 0.99 0.99 0.99 1.00 1.00
## Proportion Explained 0.00 0.00 0.00 0.00 0.00 0.00
## Cumulative Proportion 0.98 0.99 0.99 0.99 1.00 1.00
##
## Mean item complexity = 5.4
## Test of the hypothesis that 50 components are sufficient.
##
## The root mean square of the residuals (RMSR) is 0
##
## Fit based upon off diagonal values = 1
On the far right of the factor loading matrix are two columns, labelled h2 and u2. h2 is the communalities (which are sometimes called h2). These communalities are all equal to 1 because we have extracted 50 items, the same as the number of variables: we’ve explained all of the variance in every variable. When we extract fewer factors (or components) we’ll have lower communalities. Next to the communality column is the uniqueness column, labelled u2. This is the amount of unique variance for each variable, and it’s 1 minus the communality; because all of the communalities are 1, all of the uniquenesses are 0…. The next thing to look at after the factor loading matrix is the eigenvalues. The eigenvalues associated with each factor represent the variance explained by that particular linear component. R calls these SS loadings (sums of squared loadings), because they are the sum of the squared loadings. (You can also find them in a variable associated with the model called values, so in our case we could access this variable using pc1$values). R also displays the eigenvalues in terms of the proportion of variance explained… We should also consider the scree plot.
Field recommends two approaches to determining the required number of factors:
# Determine Number of Factors to Extract
ev <- eigen(cor(data_EFA)) # get eigenvalues
ap <- parallel(subject = nrow(data_EFA), var = ncol(data_EFA),
rep = 100, cent = .05)
nS <- nScree(x = ev$values, aparallel = ap$eigen$qevpea)
plotnScree(nS)pc1$Vaccounted %>%
as.data.frame() %>%
select(PC4) %>%
filter(row.names(.) == "Cumulative Proportion")| PC4 | |
|---|---|
| Cumulative Proportion | 0.5725453 |
We had originally predicted to have 2 factors, growth vs. deficit-reduction oriented (with 6 subdimensions in total, for autonomy, competence, and relatedness).
However, based on the point of inflexion of the scree plot above, we would extract 4 factors (excluding the point of inflexion, as per Field/Thurstone’s recommendations), which together account for 57.10% of the variance in the data.
The scree plot requires a sample size greater than 200 to be considered reliable, and we have 458, so we are confident in these results.
# ev$values # print eigenvalues
# Determine eigen values > 1
HighEigenValues <- ev$values[ev$values > 1]
HighEigenValues## [1] 16.028264 6.037706 3.855612 2.705684 1.464289 1.124544 1.047106
## [1] 7
pc1$Vaccounted %>%
as.data.frame() %>%
select(PC7) %>%
filter(row.names(.) == "Cumulative Proportion")| PC7 | |
|---|---|
| Cumulative Proportion | 0.6452641 |
Based on the number of eigen values greater than 1 (above), we would extract 7 factors, which together account for 64.46% of the variance in the data.
## [1] 16.0282644 6.0377062 3.8556123 2.7056837 1.4642890 1.1245437
## [7] 1.0471059 0.9413763 0.8558648 0.7826413 0.7312287 0.7008446
## [1] 12
pc1$Vaccounted %>%
as.data.frame() %>%
select(PC11) %>%
filter(row.names(.) == "Cumulative Proportion")| PC11 | |
|---|---|
| Cumulative Proportion | 0.7114863 |
Using Jolliffe’s criterion (eigenvalues greater than 0.7), we get 11 factors, explaining 71.16% of the variance [Note: there is no reason to favor this criterion].
However, to be considered reliable, Kaiser’s method requires either: (a) the number of variables to be less than 30 and all resulting communalities (after extraction) to be greater than 0.7, or; (b) the sample size to be greater than 250 and the average communality to be greater than 0.6.
Let’s look at communalities greater than 0.7 as well as the average communality:
# Communalities
dataForScreePlot <- factanal(data_EFA, factors = 4, rotation = "varimax")
itemCommunalities <- 1 - dataForScreePlot$uniquenesses;
# List items with low communalities ( < .70).
itemsWithLowCommunalities <- itemCommunalities[itemCommunalities < .70]
itemsWithLowCommunalities## rel_deficit1 rel_growth1 rel_deficit2 rel_deficit3 rel_growth2
## 0.4938612 0.5001530 0.5320966 0.5267940 0.5875829
## rel_growth3 rel_deficit4 rel_growth4 rel_deficit5 rel_growth5
## 0.4743004 0.4450193 0.5147191 0.5946466 0.5514007
## rel_growth6 rel_growth7 rel_deficit6 rel_deficit7 aut_growth1
## 0.5612520 0.4373546 0.1541566 0.5513229 0.4774223
## aut_deficit1 aut_growth2 aut_deficit2 aut_deficit3 aut_growth3
## 0.4129901 0.4903851 0.4827126 0.6125896 0.5325746
## aut_growth4 aut_growth5 aut_deficit4 aut_deficit5 aut_deficit6
## 0.4407200 0.4105065 0.5948108 0.5400531 0.5439822
## aut_deficit7 aut_deficit8 aut_growth6 aut_growth7 aut_growth8
## 0.4716790 0.4749031 0.4190472 0.5945950 0.4343792
## aut_deficit9 aut_deficit10 comp_growth1 comp_deficit1 comp_deficit2
## 0.6447023 0.5751834 0.5958134 0.6823140 0.5909774
## comp_growth2 comp_deficit3 comp_growth3 comp_deficit4 comp_growth4
## 0.6510962 0.5145418 0.5897421 0.4500783 0.5743385
## comp_deficit5 comp_deficit6 comp_growth5 comp_growth6 comp_deficit7
## 0.5289531 0.5188286 0.6969402 0.6539638 0.6764120
## comp_deficit8 comp_growth7 comp_deficit9 comp_growth8 comp_growth9
## 0.5723192 0.6588296 0.5511665 0.5564627 0.6337183
## [1] 50
## [1] 0.5354878
Returning to Kaiser’s criteria, we find that:
Together, these verifications suggest our data does not fully comply with either of Kaiser’s criteria, meaning we cannot assume Kaiser’s method to be accurate is this situation.
All in all, Field suggests that when the criteria for Kaiser’s method are not satisfied, it is best advised to use a scree plot, provided the sample size is greater than 200, which it is.
The psych package also offers another method: the Very Simple Structure (Revelle and Rocklin, 1979) which applies a goodness of fit test to determine the optimal number of factors to extract. The graphic output indicates the “optimal” number of factors for different levels of complexity.
##
## Very Simple Structure
## Call: vss(x = data_EFA)
## VSS complexity 1 achieves a maximimum of 0.8 with 2 factors
## VSS complexity 2 achieves a maximimum of 0.91 with 3 factors
##
## The Velicer MAP achieves a minimum of 0.01 with 5 factors
## BIC achieves a minimum of -3673.97 with 5 factors
## Sample Size adjusted BIC achieves a minimum of -885.71 with 8 factors
##
## Statistics by number of factors
## vss1 vss2 map dof chisq
## 1 0.78 0.00 0.0367 1175 8873
## 2 0.80 0.89 0.0238 1126 6184
## 3 0.66 0.91 0.0142 1078 4234
## 4 0.54 0.88 0.0092 1031 2977
## 5 0.48 0.81 0.0080 985 2426
## 6 0.48 0.78 0.0081 940 2147
## 7 0.46 0.76 0.0082 896 1891
## 8 0.48 0.77 0.0085 853 1689
## prob
## 1 0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## 2 0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## 3 0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## 4 0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000057
## 5 0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010825159065728395573445508315302276969305239617824554443359375000000
## 6 0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000004537478203853367610381280838893758300400804728269577026367187500000000000000000000000000000000
## 7 0.000000000000000000000000000000000000000000000000000000000000000000000000275332244490943346551872772387525856174761429429054260253906250000000000000000000000000000000000000000000000000000000
## 8 0.000000000000000000000000000000000000000000000000000000001969293158834132156931318036185984965413808822631835937500000000000000000000000000000000000000000000000000000000000000000000000000000
## sqresid fit RMSEA BIC SABIC complex eChisq SRMR eCRMS eBIC
## 1 72.5 0.78 0.116 1597 5326 1.0 23213 0.139 0.142 15937
## 2 36.3 0.89 0.096 -789 2785 1.2 9223 0.088 0.092 2250
## 3 21.6 0.93 0.077 -2441 980 1.4 3937 0.057 0.061 -2738
## 4 14.4 0.96 0.062 -3407 -135 1.6 1585 0.036 0.040 -4799
## 5 12.3 0.96 0.055 -3674 -548 1.8 1076 0.030 0.033 -5023
## 6 11.3 0.97 0.051 -3674 -690 1.9 875 0.027 0.031 -4945
## 7 10.3 0.97 0.048 -3657 -813 2.0 694 0.024 0.028 -4854
## 8 9.5 0.97 0.045 -3593 -886 2.0 572 0.022 0.026 -4710
Now that we know how many components we want to extract, we can rerun the analysis, specifying that number.
## Principal Components Analysis
## Call: principal(r = data_EFA, nfactors = 4, rotate = "none")
## Standardized loadings (pattern matrix) based upon correlation matrix
## PC1 PC2 PC3 PC4 h2 u2 com
## rel_deficit1 0.36 0.63 0.54 0.46 1.7
## rel_growth1 0.43 0.59 0.55 0.45 2.0
## rel_deficit2 0.33 0.59 0.32 0.58 0.42 2.3
## rel_deficit3 0.28 0.57 0.38 0.58 0.42 2.5
## rel_growth2 0.41 0.67 0.63 0.37 1.7
## rel_growth3 0.43 0.55 0.53 0.47 2.3
## rel_deficit4 0.30 0.49 0.36 0.24 0.51 0.49 3.1
## rel_growth4 0.44 0.60 0.56 0.44 1.9
## rel_deficit5 0.37 0.61 0.29 0.63 0.37 2.4
## rel_growth5 0.39 0.65 0.60 0.40 1.8
## rel_growth6 0.41 0.65 0.59 0.41 1.7
## rel_growth7 0.28 0.63 0.49 0.51 1.4
## rel_deficit6 0.43 0.23 0.77 1.5
## rel_deficit7 0.40 0.62 0.20 0.59 0.41 2.0
## aut_growth1 0.63 -0.26 -0.24 0.54 0.46 1.7
## aut_deficit1 0.50 -0.35 0.22 0.44 0.56 2.4
## aut_growth2 0.66 -0.23 0.54 0.46 1.5
## aut_deficit2 0.30 0.63 -0.23 0.57 0.43 1.9
## aut_deficit3 0.57 -0.36 0.35 -0.25 0.63 0.37 2.9
## aut_growth3 0.70 -0.24 0.56 0.44 1.3
## aut_growth4 0.62 -0.27 0.50 0.50 1.6
## aut_growth5 0.62 -0.30 0.48 0.52 1.5
## aut_deficit4 0.54 -0.34 0.38 -0.28 0.63 0.37 3.2
## aut_deficit5 0.44 -0.22 0.56 -0.20 0.60 0.40 2.6
## aut_deficit6 0.49 -0.30 0.49 0.59 0.41 2.8
## aut_deficit7 0.58 -0.29 0.25 0.51 0.49 2.0
## aut_deficit8 0.55 0.33 0.32 0.53 0.47 2.5
## aut_growth6 0.57 -0.23 0.45 0.55 1.8
## aut_growth7 0.70 -0.31 0.63 0.37 1.6
## aut_growth8 0.59 -0.25 -0.28 0.49 0.51 1.8
## aut_deficit9 0.54 -0.34 0.47 0.66 0.34 3.0
## aut_deficit10 0.66 -0.20 0.35 0.60 0.40 1.8
## comp_growth1 0.72 -0.28 0.60 0.40 1.3
## comp_deficit1 0.57 0.57 0.69 0.31 2.3
## comp_deficit2 0.60 -0.21 0.47 0.62 0.38 2.2
## comp_growth2 0.72 -0.35 0.65 0.35 1.5
## comp_deficit3 0.69 0.24 0.55 0.45 1.3
## comp_growth3 0.72 -0.26 0.59 0.41 1.3
## comp_deficit4 0.51 0.32 0.37 0.54 0.46 2.9
## comp_growth4 0.69 -0.34 0.62 0.38 1.6
## comp_deficit5 0.68 0.31 0.58 0.42 1.5
## comp_deficit6 0.64 0.36 0.57 0.43 1.8
## comp_growth5 0.75 -0.35 0.69 0.31 1.4
## comp_growth6 0.72 -0.34 0.65 0.35 1.4
## comp_deficit7 0.60 0.54 0.68 0.32 2.2
## comp_deficit8 0.64 -0.21 0.35 0.60 0.40 2.0
## comp_growth7 0.74 -0.35 0.66 0.34 1.4
## comp_deficit9 0.69 0.27 0.59 0.41 1.5
## comp_growth8 0.69 -0.30 0.57 0.43 1.4
## comp_growth9 0.72 -0.34 0.64 0.36 1.4
##
## PC1 PC2 PC3 PC4
## SS loadings 16.03 6.04 3.86 2.71
## Proportion Var 0.32 0.12 0.08 0.05
## Cumulative Var 0.32 0.44 0.52 0.57
## Proportion Explained 0.56 0.21 0.13 0.09
## Cumulative Proportion 0.56 0.77 0.91 1.00
##
## Mean item complexity = 1.9
## Test of the hypothesis that 4 components are sufficient.
##
## The root mean square of the residuals (RMSR) is 0.04
## with the empirical chi square 1980.07 with prob < 0.00000000000000000000000000000000000000000000000000000000000002
##
## Fit based upon off diagonal values = 0.99
Fit values over 0.95 are often considered indicators of good fit
Our value is 0.99, which indicates that four factors are sufficient.
There’s another thing that we can look at to see if we’ve extracted the correct number of factors: this is the reproduced correlation matrix and the difference between the reproduced correlation matrix and the correlation matrix in the data. … The difference between the reproduced and actual correlation matrices is referred to as the residuals.”
# obtain the residuals
residuals <- factor.residuals(r_matrix, pc2$loadings)
residuals %>%
as.data.frame() %>%
head() %>%
select(1)| rel_deficit1 | |
|---|---|
| rel_deficit1 | 0.4634101 |
| rel_growth1 | -0.0968977 |
| rel_deficit2 | 0.0324340 |
| rel_deficit3 | -0.0480843 |
| rel_growth2 | -0.0174955 |
| rel_growth3 | -0.0396346 |
For a good model these values will all be small. There are several ways we can define how small we want the residuals to be. One approach is to see how large the residuals are, compared to the original correlations. The very worst the model could be (if we extracted no factors at all) would be the size of the correlations in the original data. Thus one approach is to compare the size of the residuals with the size of the correlations. If the correlations were small to start with, we’d expect very small residuals. If the correlations were large to start with, we wouldn’t mind if the residuals were relatively larger. So one measure of the residuals is to compare the residuals with the original correlations – because residuals are positive and negative, they should be squared before doing that. A measure of the fit of the model is therefore the sum of the squared residuals divided by the sum of the squared correlations.
# create an object that contains the factor residuals
residuals <- as.matrix(residuals[upper.tri(residuals)])
# how many large residuals there are
large.resid <- abs(residuals) > 0.05
sum(large.resid)## [1] 264
## [1] 0.2155102
There are many other ways of looking at residuals, which we’ll now explore…. A simple approach to residuals is just to say that we want the residuals to be small. In fact, we want most values to be less than 0.05. We can work out how many residuals are large by this criterion fairly easily in R. First, we need to extract the residuals into a new object. We need to do this because at the moment the matrix of residuals is symmetrical (so the residuals are repeated above and below the diagonal of the matrix), and also the diagonal of the matrix does not contain residuals. … If more than 50% [of residuals] are greater than 0.05 you probably have grounds for concern.
For our data, we have 22% so we need not to worry.
Another way to look at the residuals is to look at their mean. Rather than looking at the mean, we should square the residuals, find the mean, and then find the square root. This is the root-mean-square residual.
## [1] 0.04065398
If this [value] were much higher (say 0.08) we might want to consider extracting more factors.
We are below 0.08.
Finally, it’s worth looking at the distributions of the residuals – we expect the residuals to be approximately normally distributed – if there are any serious outliers, even if the other values are all good, we should probably look further into that.
This looks very well distributed.
The interpretability of factors can be improved through rotation. Rotation maximizes the loading of each variable on one of the extracted factors while minimizing the loading on all other factors. This process makes it much clearer which variables relate to which factors. Rotation works through changing the absolute values of the variables while keeping their differential values constant; … the exact choice of rotation will depend on whether or not you think that the underlying factors should be related. If there are theoretical grounds to think that the factors are independent (unrelated) then you should choose one of the orthogonal rotations (I recommend varimax). However, if theory suggests that your factors might correlate then one of the oblique rotations (oblimin or promax) should be selected.
# Maximum Likelihood Factor Analysis
# entering raw data and extracting 4 factors,
# with oblique (oblimin) rotation
fit <- fa(data_EFA,
nfactors = 4,
rotate = "oblimin",
fm = "ml")## Loading required namespace: GPArotation
## Factor Analysis using method = ml
## Call: fa(r = data_EFA, nfactors = 4, rotate = "oblimin", fm = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
## item ML1 ML2 ML3 ML4 h2 u2 com
## comp_growth2 36 0.76 0.65 0.35 1.0
## comp_growth5 43 0.75 0.70 0.30 1.1
## comp_growth6 44 0.75 0.65 0.35 1.1
## comp_growth7 47 0.73 0.66 0.34 1.1
## comp_growth9 50 0.71 0.63 0.37 1.1
## comp_growth1 33 0.69 0.60 0.40 1.1
## aut_growth7 29 0.69 0.59 0.41 1.3
## comp_growth3 38 0.68 0.59 0.41 1.1
## comp_growth8 49 0.64 0.56 0.44 1.2
## aut_growth2 17 0.62 0.49 0.51 1.1
## comp_growth4 40 0.61 0.57 0.43 1.5
## aut_growth3 20 0.57 0.53 0.47 1.5
## aut_growth4 21 0.53 0.44 0.56 1.6
## aut_growth1 15 0.53 0.32 0.48 0.52 1.8
## aut_growth5 22 0.49 0.41 0.59 1.7
## rel_deficit6 13 0.15 0.85 3.4
## rel_deficit5 9 0.76 0.59 0.41 1.2
## rel_deficit7 14 0.73 0.55 0.45 1.0
## rel_deficit2 3 0.72 0.53 0.47 1.2
## rel_deficit3 4 0.72 0.53 0.47 1.4
## rel_growth6 11 0.71 0.56 0.44 1.1
## rel_growth2 5 0.69 0.59 0.41 1.3
## rel_deficit1 1 0.68 0.49 0.51 1.1
## rel_growth7 12 0.66 0.44 0.56 1.1
## rel_growth4 8 0.66 0.51 0.49 1.2
## rel_growth5 10 0.64 0.55 0.45 1.6
## rel_deficit4 7 0.63 0.45 0.55 1.6
## rel_growth1 2 0.61 0.50 0.50 1.5
## rel_growth3 6 0.34 0.56 0.47 0.53 1.9
## aut_deficit9 31 0.77 0.64 0.36 1.0
## aut_deficit5 24 0.75 0.54 0.46 1.0
## aut_deficit2 18 0.74 0.48 0.52 1.2
## aut_deficit4 23 0.74 0.59 0.41 1.1
## aut_deficit3 19 0.72 0.61 0.39 1.1
## aut_deficit6 25 0.70 0.54 0.46 1.1
## aut_deficit10 32 0.60 0.58 0.42 1.3
## aut_deficit7 26 0.55 0.47 0.53 1.4
## aut_deficit1 16 0.51 0.41 0.59 1.5
## aut_growth8 30 0.41 0.44 0.43 0.57 2.1
## aut_growth6 28 0.35 0.37 0.42 0.58 3.0
## comp_deficit1 34 0.81 0.68 0.32 1.0
## comp_deficit7 45 0.78 0.68 0.32 1.1
## comp_deficit2 35 0.69 0.59 0.41 1.1
## comp_deficit8 46 0.36 0.57 0.57 0.43 1.8
## comp_deficit6 42 0.55 0.52 0.48 1.4
## comp_deficit4 39 0.52 0.45 0.55 1.7
## aut_deficit8 27 0.32 0.50 0.47 0.53 1.9
## comp_deficit9 48 0.36 0.49 0.55 0.45 1.9
## comp_deficit5 41 0.32 0.48 0.53 0.47 1.8
## comp_deficit3 37 0.33 0.45 0.51 0.49 2.0
##
## ML1 ML2 ML3 ML4
## SS loadings 9.19 6.58 6.04 4.97
## Proportion Var 0.18 0.13 0.12 0.10
## Cumulative Var 0.18 0.32 0.44 0.54
## Proportion Explained 0.34 0.25 0.23 0.19
## Cumulative Proportion 0.34 0.59 0.81 1.00
##
## With factor correlations of
## ML1 ML2 ML3 ML4
## ML1 1.00 0.25 0.31 0.39
## ML2 0.25 1.00 0.11 0.14
## ML3 0.31 0.11 1.00 0.36
## ML4 0.39 0.14 0.36 1.00
##
## Mean item complexity = 1.4
## Test of the hypothesis that 4 factors are sufficient.
##
## df null model = 1225 with the objective function = 35.38 with Chi Square = 16643.96
## df of the model are 1031 and the objective function was 6.33
##
## The root mean square of the residuals (RMSR) is 0.04
## The df corrected root mean square of the residuals is 0.04
##
## The harmonic n.obs is 489 with the empirical chi square 1624.53 with prob < 0.000000000000000000000000000025
## The total n.obs was 489 with Likelihood Chi Square = 2963.21 with prob < 0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000057
##
## Tucker Lewis Index of factoring reliability = 0.85
## RMSEA index = 0.062 and the 90 % confidence intervals are 0.059 0.065
## BIC = -3421.11
## Fit based upon off diagonal values = 0.99
## Measures of factor score adequacy
## ML1 ML2 ML3 ML4
## Correlation of (regression) scores with factors 0.97 0.96 0.96 0.95
## Multiple R square of scores with factors 0.95 0.93 0.92 0.91
## Minimum correlation of possible factor scores 0.90 0.86 0.84 0.82
fit <- fa(data_EFA,
nfactors = 2,
rotate = "oblimin",
fm = "ml")
print(fit, digits=2, sort=TRUE, cut = 0.3)## Factor Analysis using method = ml
## Call: fa(r = data_EFA, nfactors = 2, rotate = "oblimin", fm = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
## item ML1 ML2 h2 u2 com
## comp_growth4 40 0.72 0.512 0.49 1.0
## comp_growth2 36 0.71 0.551 0.45 1.0
## comp_deficit9 48 0.71 0.493 0.51 1.0
## comp_growth7 47 0.70 0.571 0.43 1.1
## comp_growth9 50 0.70 0.552 0.45 1.1
## aut_growth1 15 0.69 0.447 0.55 1.0
## comp_growth1 33 0.69 0.539 0.46 1.0
## comp_deficit8 46 0.69 0.446 0.55 1.0
## comp_growth5 43 0.68 0.593 0.41 1.2
## comp_deficit3 37 0.67 0.470 0.53 1.0
## comp_deficit5 41 0.67 0.466 0.53 1.0
## comp_growth8 49 0.67 0.503 0.50 1.0
## aut_growth3 20 0.67 0.487 0.51 1.0
## aut_growth7 29 0.67 0.502 0.50 1.1
## aut_growth2 17 0.66 0.453 0.55 1.0
## comp_growth6 44 0.66 0.554 0.45 1.2
## comp_growth3 38 0.66 0.532 0.47 1.1
## aut_deficit10 32 0.65 0.403 0.60 1.0
## aut_growth4 21 0.64 0.403 0.60 1.0
## aut_deficit3 19 0.64 0.367 0.63 1.2
## aut_growth8 30 0.64 0.373 0.63 1.1
## comp_deficit2 35 0.63 0.373 0.63 1.0
## aut_deficit7 26 0.63 0.362 0.64 1.1
## comp_deficit7 45 0.62 0.364 0.64 1.0
## comp_deficit6 42 0.61 0.378 0.62 1.0
## aut_deficit4 23 0.60 0.329 0.67 1.2
## aut_deficit9 31 0.60 0.324 0.68 1.2
## comp_deficit1 34 0.60 0.333 0.67 1.0
## aut_deficit1 16 0.59 0.318 0.68 1.3
## aut_growth5 22 0.57 0.366 0.63 1.1
## aut_deficit6 25 0.53 0.254 0.75 1.2
## aut_deficit8 27 0.52 0.268 0.73 1.0
## comp_deficit4 39 0.51 0.244 0.76 1.0
## aut_deficit5 24 0.44 0.179 0.82 1.1
## aut_growth6 28 0.39 0.31 0.331 0.67 1.9
## aut_deficit2 18 0.070 0.93 1.1
## rel_growth2 5 0.77 0.604 0.40 1.0
## rel_growth6 11 0.75 0.568 0.43 1.0
## rel_growth5 10 0.74 0.550 0.45 1.0
## rel_deficit1 1 0.70 0.485 0.51 1.0
## rel_deficit7 14 0.70 0.494 0.51 1.0
## rel_growth4 8 0.69 0.516 0.48 1.0
## rel_growth7 12 0.69 0.447 0.55 1.0
## rel_growth1 2 0.68 0.505 0.49 1.0
## rel_deficit5 9 0.67 0.454 0.55 1.0
## rel_deficit2 3 0.65 0.411 0.59 1.0
## rel_growth3 6 0.64 0.464 0.54 1.1
## rel_deficit3 4 0.61 0.358 0.64 1.0
## rel_deficit4 7 0.53 0.284 0.72 1.0
## rel_deficit6 13 0.024 0.98 1.1
##
## ML1 ML2
## SS loadings 14.23 6.65
## Proportion Var 0.28 0.13
## Cumulative Var 0.28 0.42
## Proportion Explained 0.68 0.32
## Cumulative Proportion 0.68 1.00
##
## With factor correlations of
## ML1 ML2
## ML1 1.00 0.31
## ML2 0.31 1.00
##
## Mean item complexity = 1.1
## Test of the hypothesis that 2 factors are sufficient.
##
## df null model = 1225 with the objective function = 35.38 with Chi Square = 16643.96
## df of the model are 1126 and the objective function was 13.13
##
## The root mean square of the residuals (RMSR) is 0.09
## The df corrected root mean square of the residuals is 0.09
##
## The harmonic n.obs is 489 with the empirical chi square 9503.66 with prob < 0
## The total n.obs was 489 with Likelihood Chi Square = 6159.1 with prob < 0
##
## Tucker Lewis Index of factoring reliability = 0.644
## RMSEA index = 0.096 and the 90 % confidence intervals are 0.093 0.098
## BIC = -813.5
## Fit based upon off diagonal values = 0.93
## Measures of factor score adequacy
## ML1 ML2
## Correlation of (regression) scores with factors 0.98 0.96
## Multiple R square of scores with factors 0.96 0.93
## Minimum correlation of possible factor scores 0.93 0.86
fit <- fa(data_EFA,
nfactors = 6,
rotate = "oblimin",
fm = "ml")
print(fit, digits=2, sort=TRUE, cut = 0.3)## Factor Analysis using method = ml
## Call: fa(r = data_EFA, nfactors = 6, rotate = "oblimin", fm = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
## item ML1 ML3 ML4 ML2 ML6 ML5 h2 u2 com
## comp_growth5 43 0.85 0.75 0.25 1.0
## comp_growth3 38 0.84 0.66 0.34 1.0
## comp_growth2 36 0.82 0.68 0.32 1.0
## comp_growth6 44 0.80 0.68 0.32 1.0
## comp_growth7 47 0.76 0.68 0.32 1.0
## comp_growth1 33 0.73 0.62 0.38 1.0
## comp_growth9 50 0.71 0.65 0.35 1.1
## comp_growth8 49 0.68 0.57 0.43 1.1
## aut_growth7 29 0.50 0.59 0.41 2.2
## aut_growth3 20 0.47 0.53 0.47 2.0
## aut_growth6 28 0.44 0.40 0.48 0.52 3.2
## comp_growth4 40 0.36 0.34 0.32 0.61 0.39 3.2
## aut_deficit5 24 0.81 0.60 0.40 1.1
## aut_deficit9 31 0.77 0.66 0.34 1.1
## aut_deficit2 18 0.74 0.53 0.47 1.2
## aut_deficit3 19 0.70 0.64 0.36 1.4
## aut_deficit6 25 0.67 0.54 0.46 1.1
## aut_deficit4 23 0.65 0.31 0.61 0.39 1.4
## aut_deficit10 32 0.54 0.57 0.43 1.6
## aut_deficit7 26 0.53 0.47 0.53 1.4
## aut_deficit1 16 0.48 0.42 0.58 1.6
## comp_deficit1 34 0.86 0.70 0.30 1.0
## comp_deficit7 45 0.85 0.71 0.29 1.0
## comp_deficit2 35 0.73 0.60 0.40 1.0
## comp_deficit8 46 0.60 0.58 0.42 1.3
## comp_deficit9 48 0.52 0.59 0.41 1.8
## comp_deficit6 42 0.51 0.52 0.48 1.6
## comp_deficit3 37 0.48 0.55 0.45 1.9
## comp_deficit4 39 0.34 0.43 0.51 0.49 3.4
## comp_deficit5 41 0.40 0.42 0.54 0.46 2.2
## aut_deficit8 27 0.30 0.41 0.50 0.50 3.3
## rel_growth2 5 0.82 0.68 0.32 1.0
## rel_growth5 10 0.76 0.64 0.36 1.1
## rel_growth1 2 0.75 0.58 0.42 1.0
## rel_growth3 6 0.69 0.53 0.47 1.0
## rel_growth7 12 0.66 0.48 0.52 1.1
## rel_growth6 11 0.55 0.56 0.44 1.5
## rel_growth4 8 0.51 0.51 0.49 1.6
## rel_deficit1 1 0.47 0.30 0.48 0.52 1.8
## rel_deficit3 4 0.85 0.68 0.32 1.0
## rel_deficit5 9 0.78 0.70 0.30 1.1
## rel_deficit2 3 0.56 0.54 0.46 1.5
## rel_deficit4 7 0.53 0.46 0.54 1.5
## rel_deficit7 14 0.33 0.49 0.56 0.44 1.8
## rel_deficit6 13 0.33 0.18 0.82 2.3
## aut_growth4 21 0.59 0.58 0.42 1.3
## aut_growth1 15 0.59 0.61 0.39 1.4
## aut_growth5 22 0.53 0.52 0.48 1.5
## aut_growth8 30 0.30 0.43 0.48 0.52 2.3
## aut_growth2 17 0.33 0.41 0.54 0.46 2.2
##
## ML1 ML3 ML4 ML2 ML6 ML5
## SS loadings 7.54 5.41 4.89 4.56 3.35 2.92
## Proportion Var 0.15 0.11 0.10 0.09 0.07 0.06
## Cumulative Var 0.15 0.26 0.36 0.45 0.51 0.57
## Proportion Explained 0.26 0.19 0.17 0.16 0.12 0.10
## Cumulative Proportion 0.26 0.45 0.62 0.78 0.90 1.00
##
## With factor correlations of
## ML1 ML3 ML4 ML2 ML6 ML5
## ML1 1.00 0.34 0.52 0.40 0.16 0.47
## ML3 0.34 1.00 0.34 0.03 0.20 0.28
## ML4 0.52 0.34 1.00 0.08 0.17 0.28
## ML2 0.40 0.03 0.08 1.00 0.53 0.14
## ML6 0.16 0.20 0.17 0.53 1.00 -0.10
## ML5 0.47 0.28 0.28 0.14 -0.10 1.00
##
## Mean item complexity = 1.6
## Test of the hypothesis that 6 factors are sufficient.
##
## df null model = 1225 with the objective function = 35.38 with Chi Square = 16643.96
## df of the model are 940 and the objective function was 4.57
##
## The root mean square of the residuals (RMSR) is 0.03
## The df corrected root mean square of the residuals is 0.03
##
## The harmonic n.obs is 489 with the empirical chi square 890.48 with prob < 0.87
## The total n.obs was 489 with Likelihood Chi Square = 2131.72 with prob < 0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000033
##
## Tucker Lewis Index of factoring reliability = 0.898
## RMSEA index = 0.051 and the 90 % confidence intervals are 0.048 0.054
## BIC = -3689.1
## Fit based upon off diagonal values = 0.99
## Measures of factor score adequacy
## ML1 ML3 ML4 ML2 ML6 ML5
## Correlation of (regression) scores with factors 0.97 0.96 0.96 0.95 0.94 0.91
## Multiple R square of scores with factors 0.95 0.92 0.92 0.91 0.89 0.83
## Minimum correlation of possible factor scores 0.90 0.83 0.83 0.82 0.77 0.67
Let’s attempt a higher-structure factor model using the
fa.multi function.
multi.results <- fa.multi(
data_EFA,
nfactors = 6,
nfact2 = 2,
fm = "ml",
rotate = "oblimin"
)
colnames(multi.results$f2$loadings) <- c(
"Competence-\nAutonomy", "Relatedness")
fa.multi.diagram(
multi.results,
flabels = c("Competence-\nGrowth", "Autonomy-\nDeficit",
"Competence-\nDeficit", "Relatedness-\nGrowth",
"Relatedness-\nDeficit", "Autonomy-\nGrowth"),
e.size = .09,
main = "Hierarchical (multilevel) Exploratory Structure of the Needs Orientation Scale",
color.lines = FALSE,
cut = 0.4
)So far, so good. Lavigne (2011) writes:
Based on the analysis we eliminated items that loaded on both factors and those that showed weak factor loadings. We selected a total of 10 items (5 for each factor) having the highest loadings on the hypothesized factors and adequately measuring the proposed constructs. A second exploratory factor analysis was then conducted on the final set of 10 items.
Let’s do like Lavigne and only keep the five best items that don’t load on several factors.
data_EFA_5 <- data_EFA %>%
select(comp_growth5, comp_growth3, comp_growth2,
comp_growth6, comp_growth7,
aut_deficit5, aut_deficit9, aut_deficit2,
aut_deficit3, aut_deficit6,
comp_deficit1, comp_deficit7, comp_deficit2,
comp_deficit8, comp_deficit9,
rel_growth2, rel_growth5, rel_growth1,
rel_growth3, rel_growth7,
rel_deficit3, rel_deficit5, rel_deficit2,
rel_deficit4, rel_deficit7,
aut_growth1, aut_growth4, aut_growth5,
aut_growth8, aut_growth2)
multi.results <- fa.multi(
data_EFA_5,
nfactors = 6,
nfact2 = 2,
fm = "ml",
rotate = "oblimin"
)
colnames(multi.results$f2$loadings) <- c(
"Competence-\nAutonomy", "Relatedness")
needs_dims <- c(
"Competence-Growth", "Relatedness-Growth",
"Competence-Deficit", "Autonomy-Deficit",
"Autonomy-Growth", "Relatedness-Deficit")
rownames(multi.results$f2$loadings) <- needs_dims
fa.multi.diagram(
multi.results,
flabels = gsub("-", "-\n", needs_dims),
e.size = .09,
main = "Hierarchical (multilevel) Exploratory Structure of the Needs Orientation Scale",
color.lines = FALSE,
cut = 0.4,
gcut = 0.1
)Let’s check the suitability of this new solution.
# OK (no r > .9, no item with no r > .3)
data_EFA_5 %>%
cormatrix_excel("items_matrix_5-item", print.mat = FALSE)##
##
## [Correlation matrix 'items_matrix_5-item.xlsx' has been saved to working directory (or where specified).]
## Warning in xl_open.default(paste0(filename, ".xlsx")): will not open file when
## not interactive
## NULL
## R was not square, finding R from data
## $chisq
## [1] 8913.115
##
## $p.value
## [1] 0
##
## $df
## [1] 435
## [1] 0.92
## comp_growth7 aut_growth8 comp_deficit9 comp_growth2 comp_growth3
## 0.95 0.95 0.95 0.94 0.94
## rel_growth3 aut_growth1 aut_growth2 comp_growth6 comp_deficit8
## 0.94 0.94 0.94 0.94 0.94
## aut_growth4 comp_growth5 comp_deficit2 aut_growth5 aut_deficit6
## 0.93 0.93 0.93 0.93 0.93
## rel_growth5 rel_growth7 rel_growth1 rel_deficit7 rel_deficit2
## 0.93 0.92 0.92 0.92 0.91
## rel_growth2 aut_deficit3 aut_deficit9 rel_deficit4 comp_deficit1
## 0.91 0.91 0.90 0.90 0.89
## comp_deficit7 aut_deficit5 rel_deficit5 rel_deficit3 aut_deficit2
## 0.89 0.87 0.87 0.87 0.82
# Better than before but still not > 0.00001
r_matrix <- cor(data_EFA_5, use = "pairwise.complete.obs")
det(r_matrix)## [1] 0.000000007721561
## [1] 0.9593743
## [1] 0.7911621
Are these the same five relatedness items identified by Lavigne?
# Relatedness Growth
dataset.labels %>%
rename_with(~gsub("needs_", "", .)) %>%
select(rel_growth1, rel_growth5, rel_growth2,
rel_growth3, rel_growth7)| rel_growth1 | rel_growth5 | rel_growth2 | rel_growth3 | rel_growth7 |
|---|---|---|---|---|
| I find it exciting to discuss with people on numerous topics. | I have a sincere interest in others. | I consider that the people I meet are fascinating. | they allow me to discover a lot about others. | being with others is a leasure for me. |
# Relatedness Deficit-Reduction
dataset.labels %>%
rename_with(~gsub("needs_", "", .)) %>%
select(rel_deficit5, rel_deficit3, rel_deficit4,
rel_deficit7)| rel_deficit5 | rel_deficit3 | rel_deficit4 | rel_deficit7 |
|---|---|---|---|
| it appeases me to feel accepted. | I need to feel accepted. | I don’t want to be alone. | it makes me feel secure to know others’ opinions. |
There are some differences from Lavigne, who used a PCA rather than an EFA. For growth, 4 out of 5 items are the same, but for Lavigne the last was “they allow me to learn about myself” (loading = .61 vs .51 for us), while for us the 5th was “being with others is a leasure for me” (loading = .66 vs. .63 for them). Lavigne had chosen to exclude this item probably because there was a small (.23) shared loading on deficit-reduction, but in our case this one is only .10, so I don’t think it’s worth it to exclude it on this basis.
Same thing for deficit-reduction, the 5th item differs. For Lavigne it was “it gives me a frame of reference for the important decisions I have to make” (loading = .46 vs .30 for us), while for it is “it makes me feel secure to know others’ opinions” (loading = .49 vs. .42 for them). Lavigne had once again chosen to exclude this item probably because there was a small (.29) loading shared on deficit-reduction, and this is also our case (.33), but it still seems better to me than the other options if you want to have 5 items.
Competence:
# Competence Growth
dataset.labels %>%
rename_with(~gsub("needs_", "", .)) %>%
select(comp_growth3, comp_growth2, comp_growth5,
comp_growth6, comp_growth7)| comp_growth3 | comp_growth2 | comp_growth5 | comp_growth6 | comp_growth7 |
|---|---|---|---|---|
| I find it fascinating to develop my competences, my abilities. | I like to discover what my abilities in different domains are. | being skilled in a domain enables me to learn more about myself. | I consider that exploring different domains of competence enables me to evolve as an individual. | I like to discover what my strengths are. |
# Competence Deficit-Reduction
dataset.labels %>%
rename_with(~gsub("needs_", "", .)) %>%
select(comp_deficit1, comp_deficit7, comp_deficit2,
comp_deficit8, comp_deficit9)| comp_deficit1 | comp_deficit7 | comp_deficit2 | comp_deficit8 | comp_deficit9 |
|---|---|---|---|---|
| I don’t want to look incompetent. | I don’t want to be perceived as incompetent. | I don’t want to be the one who is not competent. | I don’t want to be unskilled at what I do. | it appeases me to feel that I am good, competent. |
Autonomy:
# Autonomy Growth
dataset.labels %>%
rename_with(~gsub("needs_", "", .)) %>%
select(aut_growth4, aut_growth5, aut_growth1,
aut_growth2, aut_growth8)| aut_growth4 | aut_growth5 | aut_growth1 | aut_growth2 | aut_growth8 |
|---|---|---|---|---|
| I like to choose what interests me before acting. | I like to reflect on what I wish for in order to make reasoned decisions. | I like doing things at my own pace. | I like to explore the different choices I have before making a decision. | I like to feel autonomous. |
# Autonomy Deficit-Reduction
dataset.labels %>%
rename_with(~gsub("needs_", "", .)) %>%
select(aut_deficit5, aut_deficit9, aut_deficit2,
aut_deficit3, aut_deficit6)| aut_deficit5 | aut_deficit9 | aut_deficit2 | aut_deficit3 | aut_deficit6 |
|---|---|---|---|---|
| I don’t tolerate being told what to do. | I hate when others make decisions for me. | I don’t really tolerate authority figures. | I don’t like when people decide for me. | I don’t want to be supervised. |
Lavigne then proceeds to do the CFA, so let’s go to that step next.
In this section, we perform the Confirmatory Factor Analysis.
data_CFA <- data %>%
rename_with(~gsub("needs_", "", .)) %>%
select(rel_deficit1:comp_growth9) %>%
slice(-row_indices)
needs <- c("comp", "rel", "aut")
latent <- list(comp_growth = paste0("comp_growth", c(2:3, 5:7)),
rel_growth = paste0("rel_growth", c(1:3, 5, 7)),
aut_growth = paste0("aut_growth", c(1:2, 4:5, 8)),
comp_deficit = paste0("comp_deficit", c(1:2, 7:9)),
rel_deficit = paste0("rel_deficit", c(2:5, 7)),
aut_deficit = paste0("aut_deficit", c(2:3, 5:6, 9)),
growth = paste0(needs, "_growth"),
deficit = paste0(needs, "_deficit"))
covariance <- list(growth = "deficit"#,
#aut_growth = "aut_deficit",
#comp_growth = "comp_deficit",
#rel_growth = "rel_deficit"#,
# aut_deficit = "rel_deficit",
# comp_deficit = "rel_deficit",
# aut_deficit = "comp_deficit"
)
cfa.model <- write_lavaan(latent = latent, covariance = covariance)
cat(cfa.model)## ##################################################
## # [-----Latent variables (measurement model)-----]
##
## comp_growth =~ comp_growth2 + comp_growth3 + comp_growth5 + comp_growth6 + comp_growth7
## rel_growth =~ rel_growth1 + rel_growth2 + rel_growth3 + rel_growth5 + rel_growth7
## aut_growth =~ aut_growth1 + aut_growth2 + aut_growth4 + aut_growth5 + aut_growth8
## comp_deficit =~ comp_deficit1 + comp_deficit2 + comp_deficit7 + comp_deficit8 + comp_deficit9
## rel_deficit =~ rel_deficit2 + rel_deficit3 + rel_deficit4 + rel_deficit5 + rel_deficit7
## aut_deficit =~ aut_deficit2 + aut_deficit3 + aut_deficit5 + aut_deficit6 + aut_deficit9
## growth =~ comp_growth + rel_growth + aut_growth
## deficit =~ comp_deficit + rel_deficit + aut_deficit
##
## ##################################################
## # [------------------Covariances-----------------]
##
## growth ~~ deficit
## Warning in lav_object_post_check(object): lavaan WARNING: covariance matrix of latent variables
## is not positive definite;
## use lavInspect(fit, "cov.lv") to investigate.
## lavaan 0.6.15 ended normally after 56 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 67
##
## Number of observations 490
##
## Model Test User Model:
## Standard Scaled
## Test Statistic 1653.371 1372.695
## Degrees of freedom 398 398
## P-value (Chi-square) 0.000 0.000
## Scaling correction factor 1.204
## Yuan-Bentler correction (Mplus variant)
##
## Model Test Baseline Model:
##
## Test statistic 9889.513 7840.566
## Degrees of freedom 435 435
## P-value 0.000 0.000
## Scaling correction factor 1.261
##
## User Model versus Baseline Model:
##
## Comparative Fit Index (CFI) 0.867 0.868
## Tucker-Lewis Index (TLI) 0.855 0.856
##
## Robust Comparative Fit Index (CFI) 0.874
## Robust Tucker-Lewis Index (TLI) 0.863
##
## Loglikelihood and Information Criteria:
##
## Loglikelihood user model (H0) -22736.522 -22736.522
## Scaling correction factor 1.452
## for the MLR correction
## Loglikelihood unrestricted model (H1) -21909.836 -21909.836
## Scaling correction factor 1.240
## for the MLR correction
##
## Akaike (AIC) 45607.044 45607.044
## Bayesian (BIC) 45888.069 45888.069
## Sample-size adjusted Bayesian (SABIC) 45675.412 45675.412
##
## Root Mean Square Error of Approximation:
##
## RMSEA 0.080 0.071
## 90 Percent confidence interval - lower 0.076 0.067
## 90 Percent confidence interval - upper 0.084 0.074
## P-value H_0: RMSEA <= 0.050 0.000 0.000
## P-value H_0: RMSEA >= 0.080 0.543 0.000
##
## Robust RMSEA 0.078
## 90 Percent confidence interval - lower 0.073
## 90 Percent confidence interval - upper 0.082
## P-value H_0: Robust RMSEA <= 0.050 0.000
## P-value H_0: Robust RMSEA >= 0.080 0.191
##
## Standardized Root Mean Square Residual:
##
## SRMR 0.111 0.111
##
## Parameter Estimates:
##
## Standard errors Sandwich
## Information bread Observed
## Observed information based on Hessian
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## comp_growth =~
## comp_growth2 1.000 1.109 0.828
## comp_growth3 1.011 0.043 23.394 0.000 1.121 0.821
## comp_growth5 1.085 0.047 23.234 0.000 1.203 0.833
## comp_growth6 1.026 0.050 20.471 0.000 1.137 0.828
## comp_growth7 1.004 0.045 22.545 0.000 1.113 0.842
## rel_growth =~
## rel_growth1 1.000 1.203 0.761
## rel_growth2 1.030 0.056 18.543 0.000 1.239 0.814
## rel_growth3 0.916 0.065 14.009 0.000 1.102 0.752
## rel_growth5 1.068 0.058 18.530 0.000 1.285 0.831
## rel_growth7 0.949 0.061 15.473 0.000 1.142 0.694
## aut_growth =~
## aut_growth1 1.000 0.796 0.694
## aut_growth2 1.223 0.090 13.545 0.000 0.974 0.778
## aut_growth4 1.275 0.082 15.614 0.000 1.015 0.828
## aut_growth5 1.291 0.102 12.699 0.000 1.028 0.786
## aut_growth8 1.087 0.079 13.841 0.000 0.865 0.615
## comp_deficit =~
## comp_deficit1 1.000 1.306 0.843
## comp_deficit2 0.928 0.045 20.681 0.000 1.212 0.816
## comp_deficit7 1.025 0.034 30.418 0.000 1.339 0.888
## comp_deficit8 0.778 0.057 13.541 0.000 1.016 0.768
## comp_deficit9 0.696 0.059 11.772 0.000 0.909 0.728
## rel_deficit =~
## rel_deficit2 1.000 1.170 0.699
## rel_deficit3 1.279 0.084 15.244 0.000 1.497 0.828
## rel_deficit4 1.029 0.063 16.429 0.000 1.204 0.663
## rel_deficit5 1.213 0.081 14.919 0.000 1.419 0.842
## rel_deficit7 1.030 0.065 15.872 0.000 1.205 0.725
## aut_deficit =~
## aut_deficit2 1.000 1.166 0.615
## aut_deficit3 1.036 0.105 9.903 0.000 1.208 0.769
## aut_deficit5 1.198 0.075 15.878 0.000 1.397 0.769
## aut_deficit6 1.084 0.090 12.100 0.000 1.264 0.718
## aut_deficit9 1.251 0.114 10.974 0.000 1.459 0.829
## growth =~
## comp_growth 1.000 0.859 0.859
## rel_growth 0.753 0.073 10.379 0.000 0.596 0.596
## aut_growth 0.648 0.089 7.293 0.000 0.775 0.775
## deficit =~
## comp_deficit 1.000 0.688 0.688
## rel_deficit 0.520 0.095 5.474 0.000 0.399 0.399
## aut_deficit 0.652 0.125 5.221 0.000 0.502 0.502
##
## Covariances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## growth ~~
## deficit 0.863 0.094 9.175 0.000 1.009 1.009
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .comp_growth2 0.562 0.058 9.620 0.000 0.562 0.314
## .comp_growth3 0.606 0.073 8.272 0.000 0.606 0.325
## .comp_growth5 0.640 0.069 9.213 0.000 0.640 0.307
## .comp_growth6 0.592 0.057 10.324 0.000 0.592 0.314
## .comp_growth7 0.507 0.054 9.372 0.000 0.507 0.290
## .rel_growth1 1.055 0.115 9.156 0.000 1.055 0.421
## .rel_growth2 0.780 0.075 10.382 0.000 0.780 0.337
## .rel_growth3 0.934 0.115 8.158 0.000 0.934 0.435
## .rel_growth5 0.741 0.073 10.206 0.000 0.741 0.310
## .rel_growth7 1.403 0.119 11.747 0.000 1.403 0.518
## .aut_growth1 0.684 0.077 8.872 0.000 0.684 0.519
## .aut_growth2 0.619 0.071 8.686 0.000 0.619 0.395
## .aut_growth4 0.474 0.059 8.074 0.000 0.474 0.315
## .aut_growth5 0.653 0.090 7.227 0.000 0.653 0.382
## .aut_growth8 1.228 0.115 10.675 0.000 1.228 0.621
## .comp_deficit1 0.696 0.099 7.024 0.000 0.696 0.290
## .comp_deficit2 0.737 0.101 7.285 0.000 0.737 0.334
## .comp_deficit7 0.478 0.064 7.469 0.000 0.478 0.211
## .comp_deficit8 0.719 0.079 9.122 0.000 0.719 0.410
## .comp_deficit9 0.734 0.082 8.953 0.000 0.734 0.471
## .rel_deficit2 1.432 0.129 11.062 0.000 1.432 0.511
## .rel_deficit3 1.025 0.111 9.198 0.000 1.025 0.314
## .rel_deficit4 1.847 0.161 11.478 0.000 1.847 0.560
## .rel_deficit5 0.824 0.101 8.154 0.000 0.824 0.290
## .rel_deficit7 1.313 0.126 10.381 0.000 1.313 0.475
## .aut_deficit2 2.238 0.171 13.126 0.000 2.238 0.622
## .aut_deficit3 1.010 0.110 9.139 0.000 1.010 0.409
## .aut_deficit5 1.350 0.138 9.782 0.000 1.350 0.409
## .aut_deficit6 1.499 0.134 11.197 0.000 1.499 0.484
## .aut_deficit9 0.966 0.126 7.665 0.000 0.966 0.312
## .comp_growth 0.322 0.089 3.621 0.000 0.262 0.262
## .rel_growth 0.933 0.140 6.656 0.000 0.644 0.644
## .aut_growth 0.253 0.058 4.402 0.000 0.400 0.400
## .comp_deficit 0.899 0.168 5.340 0.000 0.527 0.527
## .rel_deficit 1.152 0.145 7.928 0.000 0.841 0.841
## .aut_deficit 1.017 0.195 5.224 0.000 0.748 0.748
## growth 0.907 0.125 7.248 0.000 1.000 1.000
## deficit 0.806 0.160 5.036 0.000 1.000 1.000
##
## R-Square:
## Estimate
## comp_growth2 0.686
## comp_growth3 0.675
## comp_growth5 0.693
## comp_growth6 0.686
## comp_growth7 0.710
## rel_growth1 0.579
## rel_growth2 0.663
## rel_growth3 0.565
## rel_growth5 0.690
## rel_growth7 0.482
## aut_growth1 0.481
## aut_growth2 0.605
## aut_growth4 0.685
## aut_growth5 0.618
## aut_growth8 0.379
## comp_deficit1 0.710
## comp_deficit2 0.666
## comp_deficit7 0.789
## comp_deficit8 0.590
## comp_deficit9 0.529
## rel_deficit2 0.489
## rel_deficit3 0.686
## rel_deficit4 0.440
## rel_deficit5 0.710
## rel_deficit7 0.525
## aut_deficit2 0.378
## aut_deficit3 0.591
## aut_deficit5 0.591
## aut_deficit6 0.516
## aut_deficit9 0.688
## comp_growth 0.738
## rel_growth 0.356
## aut_growth 0.600
## comp_deficit 0.473
## rel_deficit 0.159
## aut_deficit 0.252
Model | χ2 | df | χ2∕df | p | CFI | TLI | RMSEA [90% CI] | SRMR | AIC | BIC |
|---|---|---|---|---|---|---|---|---|---|---|
Model 1 | 1,653.37 | 398 | 4.15 | < .001 | .87 | .85 | .08 [.08, .08] | .11 | 45,607.04 | 45,888.07 |
Ideal Valuea | — | — | < 2 or 3 | > .05 | ≥ .95 | ≥ .95 | < .05 [.00, .08] | ≤ .08 | Smaller | Smaller |
aAs proposed by Schreiber (2017). | ||||||||||
In the end, the fit indices are not excellent. Let’s have a look at the modification indices.
## Warning in lav_start_check_cov(lavpartable = lavpartable, start = START): lavaan WARNING: starting values imply a correlation larger than 1;
## variables involved are: growth deficit
dataset.labels <- dataset.labels %>%
rename_with(~gsub(pattern = "needs_", replacement = "", .x),
starts_with("needs"))
add_item_labels <- function(x, labels = data_labels, method = "lcs") {
for (i in seq(nrow(x))) {
x[i, "lhs.labels"] <- ifelse(
x$lhs[i] %in% names(labels),
labels[which(x$lhs[i] == names(labels))],
NA)
x[i, "rhs.labels"] <- ifelse(
x$rhs[i] %in% names(labels),
labels[which(x$rhs[i] == names(labels))],
NA)
}
x <- x[c(1:4, 9:10)]
x$similarity <- stringdist::stringsim(x$lhs.labels, x$rhs.labels)
x$similar <- x$similarity > .50
x$similar <- ifelse(is.na(x$similar), FALSE, x$similar <- x$similar)
rownames(x) <- NULL
x
}
add_item_labels(x, labels = dataset.labels)| lhs | op | rhs | mi | lhs.labels | rhs.labels | similarity | similar |
|---|---|---|---|---|---|---|---|
| rel_growth | ~~ | rel_deficit | 169.26158 | NA | NA | NA | FALSE |
| aut_growth | ~~ | aut_deficit | 97.13119 | NA | NA | NA | FALSE |
| comp_deficit1 | ~~ | comp_deficit7 | 96.28469 | I don’t want to look incompetent. | I don’t want to be perceived as incompetent. | 0.6590909 | TRUE |
| comp_growth | =~ | comp_deficit8 | 73.55686 | NA | I don’t want to be unskilled at what I do. | NA | FALSE |
| comp_growth | =~ | comp_deficit9 | 72.44432 | NA | it appeases me to feel that I am good, competent. | NA | FALSE |
| aut_deficit2 | ~~ | aut_deficit5 | 69.31519 | I don’t really tolerate authority figures. | I don’t tolerate being told what to do. | 0.3571429 | FALSE |
| growth | =~ | comp_deficit8 | 64.51417 | NA | I don’t want to be unskilled at what I do. | NA | FALSE |
| growth | =~ | comp_deficit9 | 64.09182 | NA | it appeases me to feel that I am good, competent. | NA | FALSE |
| deficit | =~ | comp_deficit9 | 58.14116 | NA | it appeases me to feel that I am good, competent. | NA | FALSE |
| deficit | =~ | comp_deficit8 | 51.95646 | NA | I don’t want to be unskilled at what I do. | NA | FALSE |
The fact that it is suggested to have comp_deficit1
covary with comp_deficit7 suggests the two items may have
similar wordings.
Looking at the wordings, they are pretty similar indeed, which may suggest to choose a replacement item.
Furthermore, it is suggested to covary rel_growth and
rel_deficit as well as aut_growth and
aut_deficit. However, doing so leads to loadings greater
than 1 (nonsensical). Perhaps replacing one of the two
comp_deficit items will improve this.
We could replace comp_deficit7 by
comp_deficit6, and allow some of the needs orientations to
covary:
## [1] "I have to feel competent."
latent$comp_deficit[3] <- "comp_deficit6"
covariance <- c(covariance, aut_growth = "aut_deficit", rel_growth = "rel_deficit")
cfa.model <- write_lavaan(latent = latent, covariance = covariance)
cat(cfa.model)## ##################################################
## # [-----Latent variables (measurement model)-----]
##
## comp_growth =~ comp_growth2 + comp_growth3 + comp_growth5 + comp_growth6 + comp_growth7
## rel_growth =~ rel_growth1 + rel_growth2 + rel_growth3 + rel_growth5 + rel_growth7
## aut_growth =~ aut_growth1 + aut_growth2 + aut_growth4 + aut_growth5 + aut_growth8
## comp_deficit =~ comp_deficit1 + comp_deficit2 + comp_deficit6 + comp_deficit8 + comp_deficit9
## rel_deficit =~ rel_deficit2 + rel_deficit3 + rel_deficit4 + rel_deficit5 + rel_deficit7
## aut_deficit =~ aut_deficit2 + aut_deficit3 + aut_deficit5 + aut_deficit6 + aut_deficit9
## growth =~ comp_growth + rel_growth + aut_growth
## deficit =~ comp_deficit + rel_deficit + aut_deficit
##
## ##################################################
## # [------------------Covariances-----------------]
##
## growth ~~ deficit
## aut_growth ~~ aut_deficit
## rel_growth ~~ rel_deficit
## Warning in lav_object_post_check(object): lavaan WARNING: some estimated lv
## variances are negative
## lavaan 0.6.15 ended normally after 60 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 69
##
## Number of observations 490
##
## Model Test User Model:
## Standard Scaled
## Test Statistic 1273.015 1049.642
## Degrees of freedom 396 396
## P-value (Chi-square) 0.000 0.000
## Scaling correction factor 1.213
## Yuan-Bentler correction (Mplus variant)
##
## Model Test Baseline Model:
##
## Test statistic 9613.071 7621.046
## Degrees of freedom 435 435
## P-value 0.000 0.000
## Scaling correction factor 1.261
##
## User Model versus Baseline Model:
##
## Comparative Fit Index (CFI) 0.904 0.909
## Tucker-Lewis Index (TLI) 0.895 0.900
##
## Robust Comparative Fit Index (CFI) 0.913
## Robust Tucker-Lewis Index (TLI) 0.904
##
## Loglikelihood and Information Criteria:
##
## Loglikelihood user model (H0) -22669.860 -22669.860
## Scaling correction factor 1.398
## for the MLR correction
## Loglikelihood unrestricted model (H1) -22033.353 -22033.353
## Scaling correction factor 1.240
## for the MLR correction
##
## Akaike (AIC) 45477.721 45477.721
## Bayesian (BIC) 45767.135 45767.135
## Sample-size adjusted Bayesian (SABIC) 45548.130 45548.130
##
## Root Mean Square Error of Approximation:
##
## RMSEA 0.067 0.058
## 90 Percent confidence interval - lower 0.063 0.054
## 90 Percent confidence interval - upper 0.071 0.062
## P-value H_0: RMSEA <= 0.050 0.000 0.000
## P-value H_0: RMSEA >= 0.080 0.000 0.000
##
## Robust RMSEA 0.064
## 90 Percent confidence interval - lower 0.059
## 90 Percent confidence interval - upper 0.069
## P-value H_0: Robust RMSEA <= 0.050 0.000
## P-value H_0: Robust RMSEA >= 0.080 0.000
##
## Standardized Root Mean Square Residual:
##
## SRMR 0.072 0.072
##
## Parameter Estimates:
##
## Standard errors Sandwich
## Information bread Observed
## Observed information based on Hessian
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## comp_growth =~
## comp_growth2 1.000 1.109 0.829
## comp_growth3 1.014 0.044 23.238 0.000 1.124 0.824
## comp_growth5 1.078 0.046 23.655 0.000 1.195 0.827
## comp_growth6 1.027 0.050 20.624 0.000 1.139 0.829
## comp_growth7 1.006 0.044 22.694 0.000 1.115 0.844
## rel_growth =~
## rel_growth1 1.000 1.207 0.755
## rel_growth2 1.038 0.055 18.859 0.000 1.252 0.814
## rel_growth3 0.936 0.064 14.593 0.000 1.129 0.763
## rel_growth5 1.081 0.057 19.043 0.000 1.304 0.833
## rel_growth7 0.985 0.062 15.958 0.000 1.189 0.716
## aut_growth =~
## aut_growth1 1.000 0.789 0.695
## aut_growth2 1.195 0.086 13.854 0.000 0.943 0.763
## aut_growth4 1.247 0.078 15.918 0.000 0.984 0.815
## aut_growth5 1.256 0.097 13.012 0.000 0.991 0.769
## aut_growth8 1.112 0.078 14.252 0.000 0.878 0.630
## comp_deficit =~
## comp_deficit1 1.000 1.168 0.754
## comp_deficit2 1.009 0.042 23.949 0.000 1.179 0.793
## comp_deficit6 0.956 0.052 18.509 0.000 1.116 0.763
## comp_deficit8 0.883 0.061 14.360 0.000 1.032 0.779
## comp_deficit9 0.846 0.058 14.496 0.000 0.988 0.791
## rel_deficit =~
## rel_deficit2 1.000 1.247 0.735
## rel_deficit3 1.189 0.073 16.208 0.000 1.482 0.807
## rel_deficit4 1.002 0.058 17.238 0.000 1.249 0.680
## rel_deficit5 1.145 0.070 16.470 0.000 1.428 0.833
## rel_deficit7 1.019 0.058 17.652 0.000 1.270 0.753
## aut_deficit =~
## aut_deficit2 1.000 1.134 0.601
## aut_deficit3 1.064 0.103 10.328 0.000 1.207 0.775
## aut_deficit5 1.212 0.077 15.642 0.000 1.374 0.762
## aut_deficit6 1.107 0.090 12.266 0.000 1.255 0.719
## aut_deficit9 1.258 0.111 11.355 0.000 1.426 0.818
## growth =~
## comp_growth 1.000 0.914 0.914
## rel_growth 0.702 0.057 12.261 0.000 0.590 0.590
## aut_growth 0.566 0.070 8.077 0.000 0.727 0.727
## deficit =~
## comp_deficit 1.000 1.006 1.006
## rel_deficit 0.468 0.059 7.889 0.000 0.441 0.441
## aut_deficit 0.360 0.081 4.459 0.000 0.373 0.373
##
## Covariances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## growth ~~
## deficit 0.931 0.088 10.617 0.000 0.781 0.781
## .aut_growth ~~
## .aut_deficit 0.329 0.055 5.964 0.000 0.578 0.578
## .rel_growth ~~
## .rel_deficit 0.828 0.101 8.218 0.000 0.759 0.759
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .comp_growth2 0.561 0.058 9.708 0.000 0.561 0.313
## .comp_growth3 0.598 0.072 8.278 0.000 0.598 0.321
## .comp_growth5 0.658 0.070 9.369 0.000 0.658 0.315
## .comp_growth6 0.590 0.057 10.349 0.000 0.590 0.313
## .comp_growth7 0.502 0.053 9.478 0.000 0.502 0.287
## .rel_growth1 1.095 0.111 9.884 0.000 1.095 0.429
## .rel_growth2 0.800 0.072 11.123 0.000 0.800 0.338
## .rel_growth3 0.916 0.111 8.281 0.000 0.916 0.418
## .rel_growth5 0.748 0.067 11.197 0.000 0.748 0.306
## .rel_growth7 1.341 0.115 11.702 0.000 1.341 0.487
## .aut_growth1 0.665 0.074 8.956 0.000 0.665 0.517
## .aut_growth2 0.636 0.072 8.797 0.000 0.636 0.417
## .aut_growth4 0.490 0.058 8.497 0.000 0.490 0.336
## .aut_growth5 0.680 0.093 7.300 0.000 0.680 0.409
## .aut_growth8 1.170 0.112 10.400 0.000 1.170 0.603
## .comp_deficit1 1.037 0.141 7.357 0.000 1.037 0.432
## .comp_deficit2 0.817 0.111 7.384 0.000 0.817 0.370
## .comp_deficit6 0.892 0.090 9.910 0.000 0.892 0.417
## .comp_deficit8 0.688 0.081 8.524 0.000 0.688 0.392
## .comp_deficit9 0.583 0.061 9.609 0.000 0.583 0.374
## .rel_deficit2 1.323 0.119 11.079 0.000 1.323 0.460
## .rel_deficit3 1.174 0.121 9.724 0.000 1.174 0.348
## .rel_deficit4 1.813 0.150 12.101 0.000 1.813 0.538
## .rel_deficit5 0.898 0.095 9.462 0.000 0.898 0.306
## .rel_deficit7 1.229 0.116 10.618 0.000 1.229 0.432
## .aut_deficit2 2.276 0.166 13.710 0.000 2.276 0.639
## .aut_deficit3 0.971 0.103 9.387 0.000 0.971 0.400
## .aut_deficit5 1.360 0.131 10.365 0.000 1.360 0.419
## .aut_deficit6 1.476 0.130 11.352 0.000 1.476 0.484
## .aut_deficit9 1.002 0.125 8.051 0.000 1.002 0.330
## .comp_growth 0.203 0.077 2.645 0.008 0.165 0.165
## .rel_growth 0.950 0.124 7.647 0.000 0.652 0.652
## .aut_growth 0.293 0.049 5.959 0.000 0.471 0.471
## .comp_deficit -0.017 0.146 -0.119 0.905 -0.013 -0.013
## .rel_deficit 1.252 0.131 9.585 0.000 0.806 0.806
## .aut_deficit 1.106 0.189 5.869 0.000 0.861 0.861
## growth 1.027 0.125 8.243 0.000 1.000 1.000
## deficit 1.382 0.210 6.589 0.000 1.000 1.000
##
## R-Square:
## Estimate
## comp_growth2 0.687
## comp_growth3 0.679
## comp_growth5 0.685
## comp_growth6 0.687
## comp_growth7 0.713
## rel_growth1 0.571
## rel_growth2 0.662
## rel_growth3 0.582
## rel_growth5 0.694
## rel_growth7 0.513
## aut_growth1 0.483
## aut_growth2 0.583
## aut_growth4 0.664
## aut_growth5 0.591
## aut_growth8 0.397
## comp_deficit1 0.568
## comp_deficit2 0.630
## comp_deficit6 0.583
## comp_deficit8 0.608
## comp_deficit9 0.626
## rel_deficit2 0.540
## rel_deficit3 0.652
## rel_deficit4 0.462
## rel_deficit5 0.694
## rel_deficit7 0.568
## aut_deficit2 0.361
## aut_deficit3 0.600
## aut_deficit5 0.581
## aut_deficit6 0.516
## aut_deficit9 0.670
## comp_growth 0.835
## rel_growth 0.348
## aut_growth 0.529
## comp_deficit NA
## rel_deficit 0.194
## aut_deficit 0.139
Model | χ2 | df | χ2∕df | p | CFI | TLI | RMSEA [90% CI] | SRMR | AIC | BIC |
|---|---|---|---|---|---|---|---|---|---|---|
Model 1 | 1,273.02 | 396 | 3.21 | < .001 | .90 | .90 | .07 [.06, .07] | .07 | 45,477.72 | 45,767.14 |
Ideal Valuea | — | — | < 2 or 3 | > .05 | ≥ .95 | ≥ .95 | < .05 [.00, .08] | ≤ .08 | Smaller | Smaller |
aAs proposed by Schreiber (2017). | ||||||||||
Let’s have a look at the modification indices again.
x <- modindices(fit.cfa, sort = TRUE, maximum.number = 10)
add_item_labels(x, labels = dataset.labels)| lhs | op | rhs | mi | lhs.labels | rhs.labels | similarity | similar |
|---|---|---|---|---|---|---|---|
| aut_deficit2 | ~~ | aut_deficit5 | 72.57109 | I don’t really tolerate authority figures. | I don’t tolerate being told what to do. | 0.3571429 | FALSE |
| comp_growth | =~ | comp_deficit2 | 57.89011 | NA | I don’t want to be the one who is not competent. | NA | FALSE |
| rel_deficit3 | ~~ | rel_deficit5 | 55.43022 | I need to feel accepted. | it appeases me to feel accepted. | 0.6562500 | TRUE |
| comp_deficit1 | ~~ | comp_deficit2 | 54.84207 | I don’t want to look incompetent. | I don’t want to be the one who is not competent. | 0.6250000 | TRUE |
| rel_growth | =~ | rel_deficit3 | 53.59392 | NA | I need to feel accepted. | NA | FALSE |
| deficit | =~ | comp_deficit2 | 47.42258 | NA | I don’t want to be the one who is not competent. | NA | FALSE |
| growth | =~ | comp_deficit2 | 45.99784 | NA | I don’t want to be the one who is not competent. | NA | FALSE |
| growth | =~ | comp_deficit8 | 43.48223 | NA | I don’t want to be unskilled at what I do. | NA | FALSE |
| comp_growth | =~ | comp_deficit1 | 40.83590 | NA | I don’t want to look incompetent. | NA | FALSE |
| growth | =~ | comp_deficit1 | 38.75260 | NA | I don’t want to look incompetent. | NA | FALSE |
In the end, the fit has not improved much, and we have still not
solved the issue with the model; now we get a warning from
lavaan, “some estimated lv variances are negative”, which
we would need to resolve in a different way.
This suggests the model might be specified, and a fix may be to collapse two factors together.
In particular, it might be due to the fact that during the EFA, the two factors that came up were not growth and deficit-reduction, but one “competence-autonomy” factor, and a “relatedness” factor.
Now that we have imputed the missing data, we are ready to calculate our scale means. After reversing our items, we can then get the alphas and omegas for our different scales.
# Get alpha & omega for growth
data %>%
select(starts_with("needs_") & contains("growth")) %>%
omega(nfactors = 3)## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
## Warning in cov2cor(t(w) %*% r %*% w): diag(.) had 0 or NA entries; non-finite
## result is doubtful
## Omega
## Call: omegah(m = m, nfactors = nfactors, fm = fm, key = key, flip = flip,
## digits = digits, title = title, sl = sl, labels = labels,
## plot = plot, n.obs = n.obs, rotate = rotate, Phi = Phi, option = option,
## covar = covar)
## Alpha: 0.94
## G.6: 0.96
## Omega Hierarchical: 0.8
## Omega H asymptotic: 0.84
## Omega Total 0.96
##
## Schmid Leiman Factor loadings greater than 0.2
## g F1* F2* F3* h2 u2 p2
## needs_rel_growth1 0.39 0.66 0.58 0.42 0.26
## needs_rel_growth2 0.40 0.71 0.67 0.33 0.24
## needs_rel_growth3 0.41 0.62 0.56 0.44 0.31
## needs_rel_growth4 0.39 0.59 0.50 0.50 0.30
## needs_rel_growth5 0.41 0.67 0.62 0.38 0.27
## needs_rel_growth6 0.30 0.66 0.53 0.47 0.17
## needs_rel_growth7 0.22 0.68 0.51 0.49 0.09
## needs_aut_growth1 0.48 0.56 0.55 0.45 0.43
## needs_aut_growth2 0.59 0.47 0.57 0.43 0.61
## needs_aut_growth3 0.66 0.31 0.53 0.47 0.81
## needs_aut_growth4 0.51 0.62 0.65 0.35 0.40
## needs_aut_growth5 0.51 0.57 0.59 0.41 0.44
## needs_aut_growth6 0.51 0.25 0.34 0.66 0.79
## needs_aut_growth7 0.71 0.29 0.60 0.40 0.84
## needs_aut_growth8 0.48 0.45 0.43 0.57 0.53
## needs_comp_growth1 0.78 0.60 0.40 1.00
## needs_comp_growth2 0.83 0.69 0.31 1.00
## needs_comp_growth3 0.81 0.66 0.34 1.00
## needs_comp_growth4 0.68 0.22 0.52 0.48 0.89
## needs_comp_growth5 0.85 0.72 0.28 0.99
## needs_comp_growth6 0.83 0.68 0.32 1.00
## needs_comp_growth7 0.84 0.71 0.29 1.00
## needs_comp_growth8 0.76 0.58 0.42 1.00
## needs_comp_growth9 0.83 0.68 0.32 1.00
##
## With Sums of squares of:
## g F1* F2* F3*
## 9.3 0.0 3.1 1.7
##
## general/max 2.98 max/min = Inf
## mean percent general = 0.64 with sd = 0.33 and cv of 0.52
## Explained Common Variance of the general factor = 0.66
##
## The degrees of freedom are 207 and the fit is 0.87
## The number of observations was 979 with Chi Square = 844.84 with prob < 0.0000000000000000000000000000000000000000000000000000000000000000000000000000065
## The root mean square of the residuals is 0.03
## The df corrected root mean square of the residuals is 0.03
## RMSEA index = 0.056 and the 10 % confidence intervals are 0.052 0.06
## BIC = -580.67
##
## Compare this with the adequacy of just a general factor and no group factors
## The degrees of freedom for just the general factor are 252 and the fit is 4.82
## The number of observations was 979 with Chi Square = 4666.46 with prob < 0
## The root mean square of the residuals is 0.14
## The df corrected root mean square of the residuals is 0.15
##
## RMSEA index = 0.134 and the 10 % confidence intervals are 0.13 0.137
## BIC = 2931.06
##
## Measures of factor score adequacy
## g F1* F2* F3*
## Correlation of scores with factors 0.98 0 0.93 0.88
## Multiple R square of scores with factors 0.95 0 0.87 0.77
## Minimum correlation of factor score estimates 0.90 -1 0.75 0.54
##
## Total, General and Subset omega for each subset
## g F1* F2* F3*
## Omega total for total scores and subscales 0.96 NA 0.92 0.94
## Omega general for total scores and subscales 0.80 NA 0.52 0.81
## Omega group for total scores and subscales 0.15 NA 0.40 0.13
# Get alpha & omega for deficit-reduction
data %>%
select(starts_with("needs_") & contains("deficit")) %>%
omega(nfactors = 3)## Omega
## Call: omegah(m = m, nfactors = nfactors, fm = fm, key = key, flip = flip,
## digits = digits, title = title, sl = sl, labels = labels,
## plot = plot, n.obs = n.obs, rotate = rotate, Phi = Phi, option = option,
## covar = covar)
## Alpha: 0.92
## G.6: 0.95
## Omega Hierarchical: 0.63
## Omega H asymptotic: 0.67
## Omega Total 0.94
##
## Schmid Leiman Factor loadings greater than 0.2
## g F1* F2* F3* h2 u2 p2
## needs_rel_deficit1 0.59 0.39 0.61 0.10
## needs_rel_deficit2 0.24 0.69 0.53 0.47 0.11
## needs_rel_deficit3 0.79 0.66 0.34 0.05
## needs_rel_deficit4 0.22 0.63 0.45 0.55 0.11
## needs_rel_deficit5 0.30 0.75 0.65 0.35 0.14
## needs_rel_deficit6 0.20 0.35 0.18 0.82 0.07
## needs_rel_deficit7 0.24 0.70 0.55 0.45 0.11
## needs_aut_deficit1 0.39 0.55 0.46 0.54 0.33
## needs_aut_deficit2 0.66 0.46 0.54 0.02
## needs_aut_deficit3 0.36 0.68 0.60 0.40 0.21
## needs_aut_deficit4 0.38 0.64 0.56 0.44 0.25
## needs_aut_deficit5 0.26 0.72 0.60 0.40 0.11
## needs_aut_deficit6 0.36 0.63 0.53 0.47 0.24
## needs_aut_deficit7 0.45 0.50 0.46 0.54 0.45
## needs_aut_deficit8 0.56 0.25 0.21 0.42 0.58 0.73
## needs_aut_deficit9 0.36 0.73 0.67 0.33 0.19
## needs_aut_deficit10 0.52 0.54 0.57 0.43 0.48
## needs_comp_deficit1 0.77 0.63 0.37 0.95
## needs_comp_deficit2 0.77 0.61 0.39 0.96
## needs_comp_deficit3 0.73 0.56 0.44 0.96
## needs_comp_deficit4 0.59 0.20 0.42 0.58 0.84
## needs_comp_deficit5 0.69 0.51 0.49 0.94
## needs_comp_deficit6 0.73 0.58 0.42 0.91
## needs_comp_deficit7 0.80 0.67 0.33 0.94
## needs_comp_deficit8 0.74 0.57 0.43 0.95
## needs_comp_deficit9 0.75 0.58 0.42 0.96
##
## With Sums of squares of:
## g F1* F2* F3*
## 6.71 0.19 3.80 3.17
##
## general/max 1.77 max/min = 19.51
## mean percent general = 0.47 with sd = 0.38 and cv of 0.81
## Explained Common Variance of the general factor = 0.48
##
## The degrees of freedom are 250 and the fit is 1.8
## The number of observations was 979 with Chi Square = 1742.92 with prob < 0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001
## The root mean square of the residuals is 0.04
## The df corrected root mean square of the residuals is 0.04
## RMSEA index = 0.078 and the 10 % confidence intervals are 0.075 0.082
## BIC = 21.29
##
## Compare this with the adequacy of just a general factor and no group factors
## The degrees of freedom for just the general factor are 299 and the fit is 7.76
## The number of observations was 979 with Chi Square = 7507.45 with prob < 0
## The root mean square of the residuals is 0.19
## The df corrected root mean square of the residuals is 0.19
##
## RMSEA index = 0.157 and the 10 % confidence intervals are 0.154 0.16
## BIC = 5448.37
##
## Measures of factor score adequacy
## g F1* F2* F3*
## Correlation of scores with factors 0.94 0.21 0.93 0.93
## Multiple R square of scores with factors 0.89 0.04 0.87 0.87
## Minimum correlation of factor score estimates 0.79 -0.91 0.74 0.74
##
## Total, General and Subset omega for each subset
## g F1* F2* F3*
## Omega total for total scores and subscales 0.94 0.91 0.91 0.86
## Omega general for total scores and subscales 0.63 0.87 0.36 0.09
## Omega group for total scores and subscales 0.27 0.03 0.55 0.77
# Get mean of aut.growth
# Only items identified through CFA
data <- data %>%
mutate(aut_growth = rowMeans(
pick(paste0("needs_aut_growth", c(1:2, 4:5, 8))),
na.rm = TRUE))
# Think about using psych::scoreItems...
# data_test <- data %>%
# mutate(aut_growth = scoreItems(
# items = pick(paste0("needs_aut_growth", c(1:2, 4:5, 8)))))
# Get mean of aut.deficit
data <- data %>%
mutate(aut_deficit = rowMeans(
pick(paste0("needs_aut_deficit", c(2:3, 5:6, 9))),
na.rm = TRUE))
# Get mean of comp.growth
# Only items identified through CFA
data <- data %>%
mutate(comp_growth = rowMeans(
pick(paste0("needs_comp_growth", c(2:3, 5:6, 7))),
na.rm = TRUE))
# Get mean of aut.deficit
data <- data %>%
mutate(comp_deficit = rowMeans(
pick(paste0("needs_comp_deficit", c(1:2, 7:9))),
na.rm = TRUE))
# Get mean of rel.growth
# Only items identified through CFA
data <- data %>%
mutate(rel_growth = rowMeans(
pick(paste0("needs_rel_growth", c(1:3, 5, 7))),
na.rm = TRUE))
# Get mean of rel.deficit
data <- data %>%
mutate(rel_deficit = rowMeans(
pick(paste0("needs_rel_deficit", c(2:5, 7))),
na.rm = TRUE))##
## Three factors are required for identification -- general factor loadings set to be equal.
## Proceed with caution.
## Think about redoing the analysis with alternative values of the 'option' setting.
## Omega
## Call: omegah(m = m, nfactors = nfactors, fm = fm, key = key, flip = flip,
## digits = digits, title = title, sl = sl, labels = labels,
## plot = plot, n.obs = n.obs, rotate = rotate, Phi = Phi, option = option,
## covar = covar)
## Alpha: 0.91
## G.6: 0.94
## Omega Hierarchical: 0.36
## Omega H asymptotic: 0.38
## Omega Total 0.94
##
## Schmid Leiman Factor loadings greater than 0.2
## g F1* F2* h2 u2 p2
## P_H1 0.33 0.67 0.56 0.44 0.20
## P_O1 0.37 0.69 0.61 0.39 0.22
## P_H2 0.39 0.65 0.58 0.42 0.26
## P_O2 0.43 0.64 0.62 0.38 0.31
## P_H3 0.38 0.64 0.56 0.44 0.26
## P_H4 0.40 0.58 0.51 0.49 0.32
## P_O3 0.38 0.64 0.55 0.45 0.27
## P_H5 0.38 0.68 0.60 0.40 0.24
## P_O4 0.37 0.54 0.44 0.56 0.31
## P_H6 0.35 0.72 0.64 0.36 0.18
## P_O5 0.40 0.67 0.61 0.39 0.26
## P_O6 0.36 0.73 0.67 0.33 0.19
## P_P1 0.42 0.47 0.26 0.46 0.54 0.38
## P_P2 0.36 0.65 0.56 0.44 0.24
## P_P3 0.40 0.72 0.67 0.33 0.23
## P_P4 0.42 0.63 0.58 0.42 0.30
## P_P5 0.42 0.66 0.61 0.39 0.28
##
## With Sums of squares of:
## g F1* F2*
## 2.5 4.6 2.7
##
## general/max 0.55 max/min = 1.72
## mean percent general = 0.26 with sd = 0.05 and cv of 0.2
## Explained Common Variance of the general factor = 0.26
##
## The degrees of freedom are 103 and the fit is 0.89
## The number of observations was 979 with Chi Square = 867.05 with prob < 0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000041
## The root mean square of the residuals is 0.04
## The df corrected root mean square of the residuals is 0.04
## RMSEA index = 0.087 and the 10 % confidence intervals are 0.082 0.092
## BIC = 157.74
##
## Compare this with the adequacy of just a general factor and no group factors
## The degrees of freedom for just the general factor are 119 and the fit is 6.15
## The number of observations was 979 with Chi Square = 5971.11 with prob < 0
## The root mean square of the residuals is 0.31
## The df corrected root mean square of the residuals is 0.33
##
## RMSEA index = 0.224 and the 10 % confidence intervals are 0.219 0.229
## BIC = 5151.62
##
## Measures of factor score adequacy
## g F1* F2*
## Correlation of scores with factors 0.61 0.86 0.84
## Multiple R square of scores with factors 0.37 0.74 0.71
## Minimum correlation of factor score estimates -0.25 0.48 0.42
##
## Total, General and Subset omega for each subset
## g F1* F2*
## Omega total for total scores and subscales 0.94 0.93 0.89
## Omega general for total scores and subscales 0.36 0.25 0.23
## Omega group for total scores and subscales 0.55 0.69 0.66
## Omega_h for 1 factor is not meaningful, just omega_t
## Warning in schmid(m, nfactors, fm, digits, rotate = rotate, n.obs = n.obs, :
## Omega_h and Omega_asymptotic are not meaningful with one factor
## Warning in cov2cor(t(w) %*% r %*% w): diag(.) had 0 or NA entries; non-finite
## result is doubtful
## Omega
## Call: omegah(m = m, nfactors = nfactors, fm = fm, key = key, flip = flip,
## digits = digits, title = title, sl = sl, labels = labels,
## plot = plot, n.obs = n.obs, rotate = rotate, Phi = Phi, option = option,
## covar = covar)
## Alpha: 0.92
## G.6: 0.92
## Omega Hierarchical: 0.92
## Omega H asymptotic: 1
## Omega Total 0.92
##
## Schmid Leiman Factor loadings greater than 0.2
## g F1* h2 u2 p2
## WB_1 0.88 0.78 0.22 1
## WB_2 0.90 0.81 0.19 1
## WB_3 0.80 0.64 0.36 1
## WB_4 0.86 0.74 0.26 1
##
## With Sums of squares of:
## g F1*
## 3 0
##
## general/max Inf max/min = NaN
## mean percent general = 1 with sd = 0 and cv of 0
## Explained Common Variance of the general factor = 1
##
## The degrees of freedom are 2 and the fit is 0.38
## The number of observations was 979 with Chi Square = 366.73 with prob < 0.000000000000000000000000000000000000000000000000000000000000000000000000000000023
## The root mean square of the residuals is 0.07
## The df corrected root mean square of the residuals is 0.12
## RMSEA index = 0.432 and the 10 % confidence intervals are 0.395 0.47
## BIC = 352.96
##
## Compare this with the adequacy of just a general factor and no group factors
## The degrees of freedom for just the general factor are 2 and the fit is 0.38
## The number of observations was 979 with Chi Square = 366.73 with prob < 0.000000000000000000000000000000000000000000000000000000000000000000000000000000023
## The root mean square of the residuals is 0.07
## The df corrected root mean square of the residuals is 0.12
##
## RMSEA index = 0.432 and the 10 % confidence intervals are 0.395 0.47
## BIC = 352.96
##
## Measures of factor score adequacy
## g F1*
## Correlation of scores with factors 0.96 0
## Multiple R square of scores with factors 0.92 0
## Minimum correlation of factor score estimates 0.85 -1
##
## Total, General and Subset omega for each subset
## g F1*
## Omega total for total scores and subscales 0.92 0.92
## Omega general for total scores and subscales 0.92 0.92
## Omega group for total scores and subscales 0.00 0.00
## [1] "GMS_ID1" "GMS_INX2" "GMS_EXT3" "GMS_ID4" "GMS_INX5" "GMS_TRO6"
## [7] "GMS_INT7" "GMS_AMO8" "GMS_INX9" "GMS_EXT10" "GMS_ID11" "GMS_TRO12"
## [13] "GMS_AMO13" "GMS_EXT14" "GMS_AMO15" "GMS_TRO16" "GMS_INT17" "GMS_INT18"
## Omega
## Call: omegah(m = m, nfactors = nfactors, fm = fm, key = key, flip = flip,
## digits = digits, title = title, sl = sl, labels = labels,
## plot = plot, n.obs = n.obs, rotate = rotate, Phi = Phi, option = option,
## covar = covar)
## Alpha: 0.91
## G.6: 0.94
## Omega Hierarchical: 0.77
## Omega H asymptotic: 0.81
## Omega Total 0.95
##
## Schmid Leiman Factor loadings greater than 0.2
## g F1* F2* F3* F4* F5* F6* h2 u2 p2
## GMS_ID1 0.62 0.28 0.60 0.40 0.63
## GMS_INX2 0.54 0.63 0.69 0.31 0.43
## GMS_EXT3 0.55 0.60 0.70 0.30 0.43
## GMS_ID4 0.56 0.41 0.54 0.46 0.58
## GMS_INX5 0.55 0.64 0.73 0.27 0.42
## GMS_TRO6 0.52 0.67 0.74 0.26 0.36
## GMS_INT7 0.67 0.54 0.72 0.28 0.62
## GMS_AMO8 0.36 0.71 0.61 0.39 0.21
## GMS_INX9 0.56 0.42 0.58 0.74
## GMS_EXT10 0.57 0.56 0.60 0.40 0.53
## GMS_ID11 0.58 0.73 0.86 0.14 0.39
## GMS_TRO12 0.52 0.58 0.67 0.33 0.41
## GMS_AMO13 0.41 0.68 0.66 0.34 0.25
## GMS_EXT14 0.54 0.24 0.44 0.61 0.39 0.49
## GMS_AMO15 0.43 0.65 0.67 0.33 0.27
## GMS_TRO16 0.52 0.64 0.66 0.34 0.41
## GMS_INT17 0.69 0.51 0.73 0.27 0.65
## GMS_INT18 0.66 0.46 0.68 0.32 0.63
##
## With Sums of squares of:
## g F1* F2* F3* F4* F5* F6*
## 5.50 0.88 1.48 1.22 0.89 0.76 0.89
##
## general/max 3.71 max/min = 1.94
## mean percent general = 0.47 with sd = 0.15 and cv of 0.32
## Explained Common Variance of the general factor = 0.47
##
## The degrees of freedom are 60 and the fit is 0.09
## The number of observations was 979 with Chi Square = 87.39 with prob < 0.012
## The root mean square of the residuals is 0.01
## The df corrected root mean square of the residuals is 0.01
## RMSEA index = 0.022 and the 10 % confidence intervals are 0.01 0.031
## BIC = -325.8
##
## Compare this with the adequacy of just a general factor and no group factors
## The degrees of freedom for just the general factor are 135 and the fit is 4.7
## The number of observations was 979 with Chi Square = 4564.05 with prob < 0
## The root mean square of the residuals is 0.17
## The df corrected root mean square of the residuals is 0.18
##
## RMSEA index = 0.183 and the 10 % confidence intervals are 0.179 0.188
## BIC = 3634.37
##
## Measures of factor score adequacy
## g F1* F2* F3* F4* F5*
## Correlation of scores with factors 0.89 0.83 0.98 0.91 0.86 0.89
## Multiple R square of scores with factors 0.79 0.69 0.95 0.82 0.75 0.80
## Minimum correlation of factor score estimates 0.57 0.38 0.91 0.64 0.49 0.59
## F6*
## Correlation of scores with factors 0.93
## Multiple R square of scores with factors 0.87
## Minimum correlation of factor score estimates 0.74
##
## Total, General and Subset omega for each subset
## g F1* F2* F3* F4* F5*
## Omega total for total scores and subscales 0.95 0.85 0.82 0.85 0.80 0.79
## Omega general for total scores and subscales 0.77 0.58 0.21 0.34 0.41 0.39
## Omega group for total scores and subscales 0.13 0.27 0.61 0.50 0.38 0.40
## F6*
## Omega total for total scores and subscales 0.77
## Omega general for total scores and subscales 0.43
## Omega group for total scores and subscales 0.33
data <- data %>%
mutate(GMS_identified = rowMeans(
pick(starts_with("GMS_ID")),
na.rm = TRUE),
GMS_intrinsic = rowMeans(
pick(starts_with("GMS_INX")),
na.rm = TRUE),
GMS_external = rowMeans(
pick(starts_with("GMS_EXT")),
na.rm = TRUE),
GMS_introjected = rowMeans(
pick(starts_with("GMS_TRO")),
na.rm = TRUE),
GMS_integrated = rowMeans(
pick(starts_with("GMS_INT")),
na.rm = TRUE),
GMS_amotivation = rowMeans(
pick(starts_with("GMS_AMO")),
na.rm = TRUE))## [1] "N1_SA1" "N1_FA2" "N1_SR3" "N1_FR4" "N1_SC5" "N1_FC6" "N1_SA7"
## [8] "N1_FA8" "N1_SR9" "N1_FR10" "N1_SC11" "N1_FC12" "N2_SA13" "N2_FA14"
## [15] "N2_SR15" "N2_FR16" "N2_SC17" "N2_FC18" "N2_SA19" "N2_FA20" "N2_SR21"
## [22] "N2_FR22" "N2_SC23" "N2_FC24"
## [1] "N1_FA2" "N1_FR4" "N1_FC6" "N1_FA8" "N1_FR10" "N1_FC12" "N2_FA14"
## [8] "N2_FR16" "N2_FC18" "N2_FA20" "N2_FR22" "N2_FC24"
# Get alpha & omega
data %>%
select(starts_with("N") & contains(c( "_S", "_F"))) %>%
omega(nfactors = 6)## Omega
## Call: omegah(m = m, nfactors = nfactors, fm = fm, key = key, flip = flip,
## digits = digits, title = title, sl = sl, labels = labels,
## plot = plot, n.obs = n.obs, rotate = rotate, Phi = Phi, option = option,
## covar = covar)
## Alpha: 0.94
## G.6: 0.96
## Omega Hierarchical: 0.77
## Omega H asymptotic: 0.8
## Omega Total 0.97
##
## Schmid Leiman Factor loadings greater than 0.2
## g F1* F2* F3* F4* F5* F6* h2 u2 p2
## N1_SA1- 0.53 -0.25 -0.27 0.54 0.46 0.52
## N1_FA2 0.58 0.50 0.61 0.39 0.54
## N1_SR3- 0.54 -0.59 0.68 0.32 0.43
## N1_FR4 0.53 0.49 0.22 0.61 0.39 0.46
## N1_SC5- 0.59 -0.58 0.70 0.30 0.49
## N1_FC6 0.63 0.40 0.70 0.30 0.57
## N1_SA7- 0.55 -0.22 -0.41 0.63 0.37 0.47
## N1_FA8 0.62 0.23 0.44 0.67 0.33 0.58
## N1_SR9- 0.52 -0.66 0.74 0.26 0.37
## N1_FR10 0.49 0.65 0.69 0.31 0.35
## N1_SC11- 0.60 -0.56 0.69 0.31 0.52
## N1_FC12 0.65 0.50 0.73 0.27 0.58
## N2_SA13- 0.53 -0.63 0.69 0.31 0.40
## N2_FA14 0.57 0.49 0.61 0.39 0.53
## N2_SR15- 0.53 -0.66 0.73 0.27 0.39
## N2_FR16 0.53 0.60 0.70 0.30 0.40
## N2_SC17- 0.59 -0.39 0.66 0.34 0.53
## N2_FC18 0.67 0.47 0.76 0.24 0.60
## N2_SA19- 0.54 -0.57 0.68 0.32 0.44
## N2_FA20 0.65 0.56 0.74 0.26 0.58
## N2_SR21- 0.50 -0.61 0.65 0.35 0.39
## N2_FR22 0.54 0.36 -0.23 0.61 0.39 0.47
## N2_SC23- 0.53 -0.44 0.56 0.44 0.50
## N2_FC24 0.65 0.51 0.69 0.31 0.61
##
## With Sums of squares of:
## g F1* F2* F3* F4* F5* F6*
## 7.8 1.3 1.2 1.7 1.1 1.0 1.1
##
## general/max 4.54 max/min = 1.68
## mean percent general = 0.49 with sd = 0.08 and cv of 0.16
## Explained Common Variance of the general factor = 0.51
##
## The degrees of freedom are 147 and the fit is 0.33
## The number of observations was 979 with Chi Square = 317.34 with prob < 0.000000000000015
## The root mean square of the residuals is 0.01
## The df corrected root mean square of the residuals is 0.02
## RMSEA index = 0.034 and the 10 % confidence intervals are 0.029 0.04
## BIC = -694.98
##
## Compare this with the adequacy of just a general factor and no group factors
## The degrees of freedom for just the general factor are 252 and the fit is 7.74
## The number of observations was 979 with Chi Square = 7498.66 with prob < 0
## The root mean square of the residuals is 0.17
## The df corrected root mean square of the residuals is 0.18
##
## RMSEA index = 0.171 and the 10 % confidence intervals are 0.168 0.175
## BIC = 5763.26
##
## Measures of factor score adequacy
## g F1* F2* F3* F4* F5*
## Correlation of scores with factors 0.88 0.96 0.88 0.94 0.82 0.88
## Multiple R square of scores with factors 0.78 0.92 0.77 0.88 0.68 0.77
## Minimum correlation of factor score estimates 0.56 0.85 0.54 0.76 0.35 0.55
## F6*
## Correlation of scores with factors 0.94
## Multiple R square of scores with factors 0.89
## Minimum correlation of factor score estimates 0.78
##
## Total, General and Subset omega for each subset
## g F1* F2* F3* F4* F5*
## Omega total for total scores and subscales 0.97 0.76 0.79 0.88 0.84 0.82
## Omega general for total scores and subscales 0.77 0.38 0.45 0.36 0.50 0.54
## Omega group for total scores and subscales 0.11 0.38 0.33 0.52 0.34 0.28
## F6*
## Omega total for total scores and subscales 0.72
## Omega general for total scores and subscales 0.41
## Omega group for total scores and subscales 0.31
data <- data %>%
mutate(GNSFS_satisfaction = rowMeans(
pick(starts_with("N") & contains("_S")),
na.rm = TRUE),
GNSFS_frustration = rowMeans(
pick(starts_with("N") & contains("_F")),
na.rm = TRUE),
GNSFS_satisfaction_aut = rowMeans(
pick(starts_with("N") & contains("_SA")),
na.rm = TRUE),
GNSFS_satisfaction_rel = rowMeans(
pick(starts_with("N") & contains("_SR")),
na.rm = TRUE),
GNSFS_satisfaction_comp = rowMeans(
pick(starts_with("N") & contains("_SC")),
na.rm = TRUE),
GNSFS_frustration_aut = rowMeans(
pick(starts_with("N") & contains("_FA")),
na.rm = TRUE),
GNSFS_frustration_rel = rowMeans(
pick(starts_with("N") & contains("_FR")),
na.rm = TRUE),
GNSFS_frustration_comp = rowMeans(
pick(starts_with("N") & contains("_FC")),
na.rm = TRUE))## Omega_h for 1 factor is not meaningful, just omega_t
## Warning in schmid(m, nfactors, fm, digits, rotate = rotate, n.obs = n.obs, :
## Omega_h and Omega_asymptotic are not meaningful with one factor
## Warning in cov2cor(t(w) %*% r %*% w): diag(.) had 0 or NA entries; non-finite
## result is doubtful
## Omega
## Call: omegah(m = m, nfactors = nfactors, fm = fm, key = key, flip = flip,
## digits = digits, title = title, sl = sl, labels = labels,
## plot = plot, n.obs = n.obs, rotate = rotate, Phi = Phi, option = option,
## covar = covar)
## Alpha: 0.96
## G.6: 0.94
## Omega Hierarchical: 0.96
## Omega H asymptotic: 1
## Omega Total 0.96
##
## Schmid Leiman Factor loadings greater than 0.2
## g F1* h2 u2 p2
## MS_AF1 0.95 0.90 0.10 1
## MS_AF2 0.95 0.90 0.10 1
## MS_AF3 0.91 0.84 0.16 1
##
## With Sums of squares of:
## g F1*
## 2.6 0.0
##
## general/max Inf max/min = NaN
## mean percent general = 1 with sd = 0 and cv of 0
## Explained Common Variance of the general factor = 1
##
## The degrees of freedom are 0 and the fit is 0
## The number of observations was 979 with Chi Square = 0 with prob < NA
## The root mean square of the residuals is 0
## The df corrected root mean square of the residuals is NA
##
## Compare this with the adequacy of just a general factor and no group factors
## The degrees of freedom for just the general factor are 0 and the fit is 0
## The number of observations was 979 with Chi Square = 0 with prob < NA
## The root mean square of the residuals is 0
## The df corrected root mean square of the residuals is NA
##
## Measures of factor score adequacy
## g F1*
## Correlation of scores with factors 0.98 0
## Multiple R square of scores with factors 0.96 0
## Minimum correlation of factor score estimates 0.92 -1
##
## Total, General and Subset omega for each subset
## g F1*
## Omega total for total scores and subscales 0.96 0.96
## Omega general for total scores and subscales 0.96 0.96
## Omega group for total scores and subscales 0.00 0.00
## [1] "BF_E1r" "BF_C3r" "BF_N4r" "BF_O5r" "BF_A7r"
## [1] "BF_N4r" "BF_N9"
data <- data %>%
mutate(across(contains("BF_") & ends_with("r"),
~nice_reverse(.x, 5), .names = "{col}"))
# Get alpha & omega
data %>%
select(starts_with("BF_")) %>%
omega(nfactors = 5)## Omega
## Call: omegah(m = m, nfactors = nfactors, fm = fm, key = key, flip = flip,
## digits = digits, title = title, sl = sl, labels = labels,
## plot = plot, n.obs = n.obs, rotate = rotate, Phi = Phi, option = option,
## covar = covar)
## Alpha: 0.69
## G.6: 0.74
## Omega Hierarchical: 0.5
## Omega H asymptotic: 0.6
## Omega Total 0.83
##
## Schmid Leiman Factor loadings greater than 0.2
## g F1* F2* F3* F4* F5* h2 u2 p2
## BF_E1r 0.24 0.47 -0.38 0.46 0.54 0.12
## BF_A2 0.25 0.24 0.19 0.81 0.32
## BF_C3r 0.48 0.88 1.00 0.00 0.23
## BF_N4r- 0.63 -0.38 0.22 0.62 0.38 0.64
## BF_O5r 0.20 0.94 0.93 0.07 0.04
## BF_E6 0.36 0.88 0.90 0.10 0.14
## BF_A7r 0.39 0.21 0.79 0.71
## BF_C8 0.30 0.31 0.39 0.36 0.64 0.25
## BF_N9- 0.70 -0.42 0.70 0.30 0.71
## BF_O10 0.40 0.24 0.22 0.78 0.03
##
## With Sums of squares of:
## g F1* F2* F3* F4* F5*
## 1.65 0.38 0.88 1.11 1.08 0.47
##
## general/max 1.48 max/min = 2.92
## mean percent general = 0.32 with sd = 0.27 and cv of 0.85
## Explained Common Variance of the general factor = 0.3
##
## The degrees of freedom are 5 and the fit is 0.08
## The number of observations was 979 with Chi Square = 78.39 with prob < 0.0000000000000018
## The root mean square of the residuals is 0.03
## The df corrected root mean square of the residuals is 0.08
## RMSEA index = 0.122 and the 10 % confidence intervals are 0.099 0.147
## BIC = 43.96
##
## Compare this with the adequacy of just a general factor and no group factors
## The degrees of freedom for just the general factor are 35 and the fit is 0.95
## The number of observations was 979 with Chi Square = 927.86 with prob < 0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000004
## The root mean square of the residuals is 0.12
## The df corrected root mean square of the residuals is 0.14
##
## RMSEA index = 0.161 and the 10 % confidence intervals are 0.153 0.171
## BIC = 686.83
##
## Measures of factor score adequacy
## g F1* F2* F3* F4* F5*
## Correlation of scores with factors 0.80 0.52 0.96 0.97 0.93 0.68
## Multiple R square of scores with factors 0.65 0.27 0.91 0.94 0.86 0.47
## Minimum correlation of factor score estimates 0.29 -0.46 0.82 0.88 0.72 -0.07
##
## Total, General and Subset omega for each subset
## g F1* F2* F3* F4* F5*
## Omega total for total scores and subscales 0.83 0.73 1.00 0.70 0.68 0.24
## Omega general for total scores and subscales 0.50 0.55 0.23 0.03 0.15 0.09
## Omega group for total scores and subscales 0.24 0.19 0.77 0.67 0.53 0.15
data <- data %>%
mutate(BF_openness = rowMeans(
pick(starts_with("BF_O")),
na.rm = TRUE),
BF_conscientiousness = rowMeans(
pick(starts_with("BF_C")),
na.rm = TRUE),
BF_extroversion = rowMeans(
pick(starts_with("BF_E")),
na.rm = TRUE),
BF_agreableness = rowMeans(
pick(starts_with("BF_A")),
na.rm = TRUE),
BF_neuroticism = rowMeans(
pick(starts_with("BF_N")),
na.rm = TRUE))## [1] "Tem_Avo1" "Tem_App2" "Tem_Avo3" "Tem_App4" "Tem_App5" "Tem_Avo6"
## [7] "Tem_Avo7" "Tem_App8" "Tem_Avo9" "Tem_App10" "Tem_App11" "Tem_Avo12"
## [1] "Tem_App2" "Tem_App4" "Tem_App5" "Tem_App8" "Tem_App10" "Tem_App11"
##
## Three factors are required for identification -- general factor loadings set to be equal.
## Proceed with caution.
## Think about redoing the analysis with alternative values of the 'option' setting.
## Omega
## Call: omegah(m = m, nfactors = nfactors, fm = fm, key = key, flip = flip,
## digits = digits, title = title, sl = sl, labels = labels,
## plot = plot, n.obs = n.obs, rotate = rotate, Phi = Phi, option = option,
## covar = covar)
## Alpha: 0.8
## G.6: 0.87
## Omega Hierarchical: 0.03
## Omega H asymptotic: 0.04
## Omega Total 0.89
##
## Schmid Leiman Factor loadings greater than 0.2
## g F1* F2* h2 u2 p2
## Tem_Avo1 0.83 0.70 0.30 0.02
## Tem_App2- -0.77 0.61 0.39 0.02
## Tem_Avo3 0.80 0.65 0.35 0.02
## Tem_App4- -0.81 0.67 0.33 0.02
## Tem_App5- -0.59 0.35 0.65 0.02
## Tem_Avo6 0.87 0.77 0.23 0.02
## Tem_Avo7 0.72 0.54 0.46 0.01
## Tem_App8- -0.67 0.48 0.52 0.03
## Tem_Avo9 0.66 0.45 0.55 0.01
## Tem_App10- -0.65 0.44 0.56 0.01
## Tem_App11- -0.76 0.59 0.41 0.02
## Tem_Avo12 0.73 0.55 0.45 0.02
##
## With Sums of squares of:
## g F1* F2*
## 0.13 3.61 3.07
##
## general/max 0.04 max/min = 1.18
## mean percent general = 0.02 with sd = 0.01 and cv of 0.31
## Explained Common Variance of the general factor = 0.02
##
## The degrees of freedom are 43 and the fit is 0.18
## The number of observations was 979 with Chi Square = 177.75 with prob < 0.0000000000000000026
## The root mean square of the residuals is 0.02
## The df corrected root mean square of the residuals is 0.03
## RMSEA index = 0.057 and the 10 % confidence intervals are 0.048 0.065
## BIC = -118.37
##
## Compare this with the adequacy of just a general factor and no group factors
## The degrees of freedom for just the general factor are 54 and the fit is 5.84
## The number of observations was 979 with Chi Square = 5677.46 with prob < 0
## The root mean square of the residuals is 0.38
## The df corrected root mean square of the residuals is 0.42
##
## RMSEA index = 0.326 and the 10 % confidence intervals are 0.319 0.334
## BIC = 5305.59
##
## Measures of factor score adequacy
## g F1* F2*
## Correlation of scores with factors 0.18 0.95 0.93
## Multiple R square of scores with factors 0.03 0.90 0.86
## Minimum correlation of factor score estimates -0.93 0.80 0.73
##
## Total, General and Subset omega for each subset
## g F1* F2*
## Omega total for total scores and subscales 0.89 0.90 0.87
## Omega general for total scores and subscales 0.03 0.02 0.02
## Omega group for total scores and subscales 0.87 0.89 0.85
## [1] "MPO_MAp1" "MPO_MAv2" "MPO_PAp3" "MPO_PAv4" "MPO_MAp5" "MPO_MAv6"
## [7] "MPO_PAp7" "MPO_PAv8" "MPO_MAp9" "MPO_MAv10" "MPO_PAp11" "MPO_PAv12"
## [1] "MPO_PAp3" "MPO_PAp7" "MPO_PAp11"
##
## Three factors are required for identification -- general factor loadings set to be equal.
## Proceed with caution.
## Think about redoing the analysis with alternative values of the 'option' setting.
## Omega
## Call: omegah(m = m, nfactors = nfactors, fm = fm, key = key, flip = flip,
## digits = digits, title = title, sl = sl, labels = labels,
## plot = plot, n.obs = n.obs, rotate = rotate, Phi = Phi, option = option,
## covar = covar)
## Alpha: 0.91
## G.6: 0.94
## Omega Hierarchical: 0.62
## Omega H asymptotic: 0.66
## Omega Total 0.93
##
## Schmid Leiman Factor loadings greater than 0.2
## g F1* F2* h2 u2 p2
## MPO_MAp1 0.37 0.44 0.33 0.67 0.41
## MPO_MAv2 0.61 0.60 0.74 0.26 0.51
## MPO_PAp3 0.61 0.59 0.73 0.27 0.52
## MPO_PAv4 0.59 0.29 0.29 0.52 0.48 0.68
## MPO_MAp5 0.36 0.40 0.29 0.71 0.45
## MPO_MAv6 0.60 0.59 0.70 0.30 0.51
## MPO_PAp7 0.59 0.57 0.67 0.33 0.52
## MPO_PAv8 0.62 0.33 0.27 0.57 0.43 0.68
## MPO_MAp9 0.46 0.51 0.47 0.53 0.45
## MPO_MAv10 0.62 0.61 0.75 0.25 0.51
## MPO_PAp11 0.60 0.58 0.69 0.31 0.52
## MPO_PAv12 0.63 0.40 0.21 0.60 0.40 0.66
##
## With Sums of squares of:
## g F1* F2*
## 3.8 2.0 1.3
##
## general/max 1.94 max/min = 1.52
## mean percent general = 0.54 with sd = 0.09 and cv of 0.17
## Explained Common Variance of the general factor = 0.54
##
## The degrees of freedom are 43 and the fit is 1.82
## The number of observations was 979 with Chi Square = 1766.44 with prob < 0
## The root mean square of the residuals is 0.1
## The df corrected root mean square of the residuals is 0.12
## RMSEA index = 0.202 and the 10 % confidence intervals are 0.194 0.211
## BIC = 1470.32
##
## Compare this with the adequacy of just a general factor and no group factors
## The degrees of freedom for just the general factor are 54 and the fit is 4.03
## The number of observations was 979 with Chi Square = 3921.67 with prob < 0
## The root mean square of the residuals is 0.21
## The df corrected root mean square of the residuals is 0.23
##
## RMSEA index = 0.27 and the 10 % confidence intervals are 0.263 0.278
## BIC = 3549.8
##
## Measures of factor score adequacy
## g F1* F2*
## Correlation of scores with factors 0.80 0.75 0.74
## Multiple R square of scores with factors 0.64 0.56 0.55
## Minimum correlation of factor score estimates 0.28 0.11 0.10
##
## Total, General and Subset omega for each subset
## g F1* F2*
## Omega total for total scores and subscales 0.93 0.9 0.89
## Omega general for total scores and subscales 0.62 0.5 0.51
## Omega group for total scores and subscales 0.26 0.4 0.38
data <- data %>%
mutate(MPOS_mastery_approach = rowMeans(
pick(starts_with("MPO_MAp")),
na.rm = TRUE),
MPOS_mastery_avoidance = rowMeans(
pick(starts_with("MPO_MAv")),
na.rm = TRUE),
MPOS_performance_approach = rowMeans(
pick(starts_with("MPO_PAp")),
na.rm = TRUE),
MPOS_performance_avoidance = rowMeans(
pick(starts_with("MPO_PAv")),
na.rm = TRUE))## [1] "Pers_F1" "Pers_R2" "Pers_R3" "Pers_F4" "Pers_R5" "Pers_F6" "Pers_F7"
## [8] "Pers_R8"
## [1] "Pers_R2" "Pers_R3" "Pers_R5" "Pers_R8"
##
## Three factors are required for identification -- general factor loadings set to be equal.
## Proceed with caution.
## Think about redoing the analysis with alternative values of the 'option' setting.
## Omega
## Call: omegah(m = m, nfactors = nfactors, fm = fm, key = key, flip = flip,
## digits = digits, title = title, sl = sl, labels = labels,
## plot = plot, n.obs = n.obs, rotate = rotate, Phi = Phi, option = option,
## covar = covar)
## Alpha: 0.76
## G.6: 0.81
## Omega Hierarchical: 0.22
## Omega H asymptotic: 0.26
## Omega Total 0.85
##
## Schmid Leiman Factor loadings greater than 0.2
## g F1* F2* h2 u2 p2
## Pers_F1 0.28 0.78 0.69 0.31 0.12
## Pers_R2 0.31 0.74 0.64 0.36 0.15
## Pers_R3 0.27 0.74 0.64 0.36 0.12
## Pers_F4 0.31 0.68 0.56 0.44 0.17
## Pers_R5 0.28 0.63 0.48 0.52 0.17
## Pers_F6 0.33 0.61 0.52 0.48 0.21
## Pers_F7 0.54 0.34 0.66 0.11
## Pers_R8 0.30 0.65 0.52 0.48 0.17
##
## With Sums of squares of:
## g F1* F2*
## 0.67 1.97 1.74
##
## general/max 0.34 max/min = 1.13
## mean percent general = 0.15 with sd = 0.03 and cv of 0.22
## Explained Common Variance of the general factor = 0.15
##
## The degrees of freedom are 13 and the fit is 0.06
## The number of observations was 979 with Chi Square = 60.76 with prob < 0.000000038
## The root mean square of the residuals is 0.02
## The df corrected root mean square of the residuals is 0.03
## RMSEA index = 0.061 and the 10 % confidence intervals are 0.046 0.077
## BIC = -28.77
##
## Compare this with the adequacy of just a general factor and no group factors
## The degrees of freedom for just the general factor are 20 and the fit is 2.13
## The number of observations was 979 with Chi Square = 2071.91 with prob < 0
## The root mean square of the residuals is 0.3
## The df corrected root mean square of the residuals is 0.36
##
## RMSEA index = 0.324 and the 10 % confidence intervals are 0.312 0.336
## BIC = 1934.18
##
## Measures of factor score adequacy
## g F1* F2*
## Correlation of scores with factors 0.47 0.86 0.85
## Multiple R square of scores with factors 0.22 0.73 0.72
## Minimum correlation of factor score estimates -0.56 0.47 0.44
##
## Total, General and Subset omega for each subset
## g F1* F2*
## Omega total for total scores and subscales 0.85 0.84 0.81
## Omega general for total scores and subscales 0.22 0.13 0.13
## Omega group for total scores and subscales 0.60 0.71 0.68
## [1] "PGD_A1" "PGD_EM2" "PGD_PR3" "PGD_SA4" "PGD_PiL5" "PGD_A6"
## [7] "PGD_EM7" "PGD_PR8" "PGD_SA9" "PGD_PiL10" "PGD_ATT2"
## [1] "PGD_PiL5" "PGD_PiL10"
## Warning in GPFoblq(A, Tmat = Tmat, normalize = normalize, eps = eps, maxit =
## maxit, : convergence not obtained in GPFoblq. 1000 iterations used.
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
## Warning in cov2cor(t(w) %*% r %*% w): diag(.) had 0 or NA entries; non-finite
## result is doubtful
## Omega
## Call: omegah(m = m, nfactors = nfactors, fm = fm, key = key, flip = flip,
## digits = digits, title = title, sl = sl, labels = labels,
## plot = plot, n.obs = n.obs, rotate = rotate, Phi = Phi, option = option,
## covar = covar)
## Alpha: 0.92
## G.6: 0.93
## Omega Hierarchical: 0.9
## Omega H asymptotic: 0.95
## Omega Total 0.95
##
## Schmid Leiman Factor loadings greater than 0.2
## g F1* F2* F3* F4* F5* h2 u2 p2
## PGD_A1 0.75 0.32 0.68 0.32 0.82
## PGD_EM2 0.83 0.68 0.32 1.02
## PGD_PR3 0.76 0.63 0.98 0.02 0.60
## PGD_SA4 0.81 0.65 0.35 1.00
## PGD_PiL5 0.74 0.67 1.00 0.00 0.56
## PGD_A6 0.85 0.72 0.28 1.00
## PGD_EM7 0.78 0.66 0.34 0.92
## PGD_PR8 0.71 0.24 0.60 0.40 0.84
## PGD_SA9 0.85 0.77 0.23 0.95
## PGD_PiL10 0.79 0.24 0.68 0.32 0.91
## PGD_ATT2 0.06 0.94 0.00
##
## With Sums of squares of:
## g F1* F2* F3* F4* F5*
## 6.22 0.00 0.52 0.48 0.17 0.14
##
## general/max 12.07 max/min = Inf
## mean percent general = 0.78 with sd = 0.3 and cv of 0.39
## Explained Common Variance of the general factor = 0.83
##
## The degrees of freedom are 10 and the fit is 0.01
## The number of observations was 979 with Chi Square = 7.07 with prob < 0.72
## The root mean square of the residuals is 0
## The df corrected root mean square of the residuals is 0.01
## RMSEA index = 0 and the 10 % confidence intervals are 0 0.026
## BIC = -61.8
##
## Compare this with the adequacy of just a general factor and no group factors
## The degrees of freedom for just the general factor are 44 and the fit is 0.31
## The number of observations was 979 with Chi Square = 303.03 with prob < 0.00000000000000000000000000000000000000022
## The root mean square of the residuals is 0.04
## The df corrected root mean square of the residuals is 0.05
##
## RMSEA index = 0.078 and the 10 % confidence intervals are 0.069 0.086
## BIC = 0.02
##
## Measures of factor score adequacy
## g F1* F2* F3* F4* F5*
## Correlation of scores with factors 0.97 0 0.95 0.93 0.52 0.48
## Multiple R square of scores with factors 0.93 0 0.91 0.86 0.27 0.23
## Minimum correlation of factor score estimates 0.87 -1 0.82 0.73 -0.46 -0.53
##
## Total, General and Subset omega for each subset
## g F1* F2* F3* F4* F5*
## Omega total for total scores and subscales 0.95 NA 0.91 0.89 0.39 0.91
## Omega general for total scores and subscales 0.90 NA 0.67 0.75 0.27 0.90
## Omega group for total scores and subscales 0.03 NA 0.23 0.14 0.12 0.01
data <- data %>%
mutate(PGDS_autonomy = rowMeans(
pick(starts_with("PGD_A")),
na.rm = TRUE),
PGDS_mastery = rowMeans(
pick(starts_with("PGD_E")),
na.rm = TRUE),
PGDS_positive = rowMeans(
pick(starts_with("PGD_PR")),
na.rm = TRUE),
PGDS_acceptance = rowMeans(
pick(starts_with("PGD_S")),
na.rm = TRUE),
PGDS_purpose = rowMeans(
pick(starts_with("PGD_Pi")),
na.rm = TRUE))Note: the 10-item version was used instead of the 10-item version.
## [1] "SAc_EAE1" "SAc_AuDi2" "SAc_TR3" "SAc_EAE4" "SAc_AuDi5"
## [6] "SAc_ACC6r" "SAc_DUn7" "SAc_ACC8r" "SAc_AuDi9" "SAc_AuDi10"
## [11] "SAc_AuDi11" "SAc_Dun12" "SAc_TR13r" "SAc_ACC14r" "SAc_TR15"
## [1] "SAc_DUn7" "SAc_Dun12"
data <- data %>%
mutate(across(starts_with("SAc_") & ends_with("r"),
~nice_reverse(.x, 4), .names = "{col}"))
# Get alpha & omega
data %>%
select(starts_with("SAc_")) %>%
omega(nfactors = 5)## Omega
## Call: omegah(m = m, nfactors = nfactors, fm = fm, key = key, flip = flip,
## digits = digits, title = title, sl = sl, labels = labels,
## plot = plot, n.obs = n.obs, rotate = rotate, Phi = Phi, option = option,
## covar = covar)
## Alpha: 0.74
## G.6: 0.78
## Omega Hierarchical: 0.46
## Omega H asymptotic: 0.56
## Omega Total 0.82
##
## Schmid Leiman Factor loadings greater than 0.2
## g F1* F2* F3* F4* F5* h2 u2 p2
## SAc_EAE1- 0.29 -0.37 0.36 0.64 0.23
## SAc_AuDi2 0.47 -0.28 0.23 0.38 0.62 0.58
## SAc_TR3- -0.62 0.43 0.57 0.01
## SAc_EAE4- -0.23 -0.43 0.26 0.74 0.00
## SAc_AuDi5 0.64 0.58 0.77 0.23 0.53
## SAc_ACC6r- 0.43 -0.32 0.23 0.34 0.66 0.54
## SAc_DUn7- -0.36 -0.20 0.23 0.77 0.11
## SAc_ACC8r- 0.43 -0.68 0.66 0.34 0.28
## SAc_AuDi9 0.30 0.39 0.28 0.32 0.68 0.28
## SAc_AuDi10- 0.30 0.20 0.30 0.27 0.73 0.33
## SAc_AuDi11 0.40 0.47 0.39 0.61 0.40
## SAc_Dun12- 0.21 -0.67 0.49 0.51 0.09
## SAc_TR13r- 0.29 0.53 0.44 0.56 0.19
## SAc_ACC14r- 0.54 -0.62 0.70 0.30 0.43
## SAc_TR15- -0.43 -0.21 0.38 0.62 0.09
##
## With Sums of squares of:
## g F1* F2* F3* F4* F5*
## 1.92 1.17 0.83 0.90 0.92 0.52
##
## general/max 1.64 max/min = 2.23
## mean percent general = 0.27 with sd = 0.19 and cv of 0.71
## Explained Common Variance of the general factor = 0.31
##
## The degrees of freedom are 40 and the fit is 0.07
## The number of observations was 979 with Chi Square = 65.12 with prob < 0.0073
## The root mean square of the residuals is 0.01
## The df corrected root mean square of the residuals is 0.02
## RMSEA index = 0.025 and the 10 % confidence intervals are 0.013 0.036
## BIC = -210.34
##
## Compare this with the adequacy of just a general factor and no group factors
## The degrees of freedom for just the general factor are 90 and the fit is 1.67
## The number of observations was 979 with Chi Square = 1620.26 with prob < 0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000055
## The root mean square of the residuals is 0.13
## The df corrected root mean square of the residuals is 0.14
##
## RMSEA index = 0.132 and the 10 % confidence intervals are 0.126 0.138
## BIC = 1000.47
##
## Measures of factor score adequacy
## g F1* F2* F3* F4* F5*
## Correlation of scores with factors 0.76 0.79 0.73 0.77 0.77 0.66
## Multiple R square of scores with factors 0.58 0.62 0.53 0.59 0.59 0.43
## Minimum correlation of factor score estimates 0.16 0.24 0.06 0.19 0.19 -0.13
##
## Total, General and Subset omega for each subset
## g F1* F2* F3* F4* F5*
## Omega total for total scores and subscales 0.82 0.77 0.6 0.45 0.51 0.63
## Omega general for total scores and subscales 0.46 0.38 0.2 0.03 0.06 0.34
## Omega group for total scores and subscales 0.21 0.39 0.4 0.42 0.45 0.29
data <- data %>%
mutate(SAS = rowMeans(
pick(starts_with("SAc_")),
na.rm = TRUE),
SAS_emotions = rowMeans(
pick(starts_with("SAc_E")),
na.rm = TRUE),
SAS_autonomy = rowMeans(
pick(starts_with("SAc_A")),
na.rm = TRUE),
SAS_trust = rowMeans(
pick(starts_with("SAc_T")),
na.rm = TRUE),
SAS_acceptance = rowMeans(
pick(starts_with("SAc_AC")),
na.rm = TRUE),
SAS_desirability = rowMeans(
pick(starts_with("SAc_D")),
na.rm = TRUE))The first four factors can be related to the following important aspects of the functioning of the psychologically healthy or self-actualizing person: (a) autonomy or self-direction, (b) self—acceptance and self—esteem, (c) acceptance of emotions and freedom ofexpression ofemotions, and (d) trust and responsibility in interpersonal relations. These four descriptions were agreed upon by at least two blind judges. The fifth factor is not easily interpreted but appears to be related to the ability to deal with undesirable aspects oflife rather than avoiding them. Otherjudges felt it contained elements ofsocial desirability and freedom of emotional expression.
## [1] "GiL_1" "GiL_2" "GiL_3" "GiL_4" "GiL_5" "GiL_6"
## Omega_h for 1 factor is not meaningful, just omega_t
## Warning in schmid(m, nfactors, fm, digits, rotate = rotate, n.obs = n.obs, :
## Omega_h and Omega_asymptotic are not meaningful with one factor
## Warning in cov2cor(t(w) %*% r %*% w): diag(.) had 0 or NA entries; non-finite
## result is doubtful
## Omega
## Call: omegah(m = m, nfactors = nfactors, fm = fm, key = key, flip = flip,
## digits = digits, title = title, sl = sl, labels = labels,
## plot = plot, n.obs = n.obs, rotate = rotate, Phi = Phi, option = option,
## covar = covar)
## Alpha: 0.96
## G.6: 0.95
## Omega Hierarchical: 0.96
## Omega H asymptotic: 1
## Omega Total 0.96
##
## Schmid Leiman Factor loadings greater than 0.2
## g F1* h2 u2 p2
## GiL_1 0.88 0.77 0.23 1
## GiL_2 0.90 0.81 0.19 1
## GiL_3 0.86 0.74 0.26 1
## GiL_4 0.89 0.79 0.21 1
## GiL_5 0.89 0.79 0.21 1
## GiL_6 0.91 0.83 0.17 1
##
## With Sums of squares of:
## g F1*
## 4.7 0.0
##
## general/max Inf max/min = NaN
## mean percent general = 1 with sd = 0 and cv of 0
## Explained Common Variance of the general factor = 1
##
## The degrees of freedom are 9 and the fit is 0.03
## The number of observations was 979 with Chi Square = 25.06 with prob < 0.0029
## The root mean square of the residuals is 0.01
## The df corrected root mean square of the residuals is 0.01
## RMSEA index = 0.043 and the 10 % confidence intervals are 0.023 0.063
## BIC = -36.92
##
## Compare this with the adequacy of just a general factor and no group factors
## The degrees of freedom for just the general factor are 9 and the fit is 0.03
## The number of observations was 979 with Chi Square = 25.06 with prob < 0.0029
## The root mean square of the residuals is 0.01
## The df corrected root mean square of the residuals is 0.01
##
## RMSEA index = 0.043 and the 10 % confidence intervals are 0.023 0.063
## BIC = -36.92
##
## Measures of factor score adequacy
## g F1*
## Correlation of scores with factors 0.98 0
## Multiple R square of scores with factors 0.96 0
## Minimum correlation of factor score estimates 0.92 -1
##
## Total, General and Subset omega for each subset
## g F1*
## Omega total for total scores and subscales 0.96 0.96
## Omega general for total scores and subscales 0.96 0.96
## Omega group for total scores and subscales 0.00 0.00
## [1] "GMI_Ref1" "GMI_Ref2" "GMI_Exp3" "GMI_Ref4" "GMI_Exp5" "GMI_Exp6" "GMI_Ref7"
## [8] "GMI_Ref8"
## [1] "GMI_Exp3" "GMI_Exp5" "GMI_Exp6"
##
## Three factors are required for identification -- general factor loadings set to be equal.
## Proceed with caution.
## Think about redoing the analysis with alternative values of the 'option' setting.
## Omega
## Call: omegah(m = m, nfactors = nfactors, fm = fm, key = key, flip = flip,
## digits = digits, title = title, sl = sl, labels = labels,
## plot = plot, n.obs = n.obs, rotate = rotate, Phi = Phi, option = option,
## covar = covar)
## Alpha: 0.89
## G.6: 0.89
## Omega Hierarchical: 0.66
## Omega H asymptotic: 0.73
## Omega Total 0.91
##
## Schmid Leiman Factor loadings greater than 0.2
## g F1* F2* h2 u2 p2
## GMI_Ref1 0.51 0.46 0.47 0.53 0.55
## GMI_Ref2 0.62 0.46 0.59 0.41 0.64
## GMI_Exp3 0.62 0.31 0.52 0.48 0.74
## GMI_Ref4 0.57 0.49 0.57 0.43 0.57
## GMI_Exp5 0.65 0.22 0.31 0.57 0.43 0.74
## GMI_Exp6 0.68 0.59 0.82 0.18 0.58
## GMI_Ref7 0.67 0.41 0.64 0.36 0.71
## GMI_Ref8 0.53 0.48 0.52 0.48 0.55
##
## With Sums of squares of:
## g F1* F2*
## 2.97 1.14 0.57
##
## general/max 2.6 max/min = 2.02
## mean percent general = 0.64 with sd = 0.08 and cv of 0.13
## Explained Common Variance of the general factor = 0.63
##
## The degrees of freedom are 13 and the fit is 0.03
## The number of observations was 979 with Chi Square = 30.97 with prob < 0.0034
## The root mean square of the residuals is 0.01
## The df corrected root mean square of the residuals is 0.02
## RMSEA index = 0.038 and the 10 % confidence intervals are 0.021 0.055
## BIC = -58.55
##
## Compare this with the adequacy of just a general factor and no group factors
## The degrees of freedom for just the general factor are 20 and the fit is 0.63
## The number of observations was 979 with Chi Square = 611.79 with prob < 0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000094
## The root mean square of the residuals is 0.15
## The df corrected root mean square of the residuals is 0.18
##
## RMSEA index = 0.174 and the 10 % confidence intervals are 0.162 0.186
## BIC = 474.06
##
## Measures of factor score adequacy
## g F1* F2*
## Correlation of scores with factors 0.83 0.68 0.67
## Multiple R square of scores with factors 0.69 0.46 0.45
## Minimum correlation of factor score estimates 0.37 -0.08 -0.09
##
## Total, General and Subset omega for each subset
## g F1* F2*
## Omega total for total scores and subscales 0.91 0.86 0.81
## Omega general for total scores and subscales 0.66 0.53 0.58
## Omega group for total scores and subscales 0.19 0.33 0.22
## [1] "Gr_1" "Gr_2" "Gr_3" "Gr_4" "Gr_5" "Gr_6"
## Omega_h for 1 factor is not meaningful, just omega_t
## Warning in schmid(m, nfactors, fm, digits, rotate = rotate, n.obs = n.obs, :
## Omega_h and Omega_asymptotic are not meaningful with one factor
## Warning in cov2cor(t(w) %*% r %*% w): diag(.) had 0 or NA entries; non-finite
## result is doubtful
## Omega
## Call: omegah(m = m, nfactors = nfactors, fm = fm, key = key, flip = flip,
## digits = digits, title = title, sl = sl, labels = labels,
## plot = plot, n.obs = n.obs, rotate = rotate, Phi = Phi, option = option,
## covar = covar)
## Alpha: 0.89
## G.6: 0.87
## Omega Hierarchical: 0.89
## Omega H asymptotic: 1
## Omega Total 0.89
##
## Schmid Leiman Factor loadings greater than 0.2
## g F1* h2 u2 p2
## Gr_1 0.78 0.61 0.39 1
## Gr_2 0.70 0.49 0.51 1
## Gr_3 0.83 0.69 0.31 1
## Gr_4 0.71 0.50 0.50 1
## Gr_5 0.82 0.67 0.33 1
## Gr_6 0.71 0.51 0.49 1
##
## With Sums of squares of:
## g F1*
## 3.5 0.0
##
## general/max Inf max/min = NaN
## mean percent general = 1 with sd = 0 and cv of 0
## Explained Common Variance of the general factor = 1
##
## The degrees of freedom are 9 and the fit is 0.05
## The number of observations was 979 with Chi Square = 47.13 with prob < 0.00000037
## The root mean square of the residuals is 0.02
## The df corrected root mean square of the residuals is 0.03
## RMSEA index = 0.066 and the 10 % confidence intervals are 0.048 0.085
## BIC = -14.85
##
## Compare this with the adequacy of just a general factor and no group factors
## The degrees of freedom for just the general factor are 9 and the fit is 0.05
## The number of observations was 979 with Chi Square = 47.13 with prob < 0.00000037
## The root mean square of the residuals is 0.02
## The df corrected root mean square of the residuals is 0.03
##
## RMSEA index = 0.066 and the 10 % confidence intervals are 0.048 0.085
## BIC = -14.85
##
## Measures of factor score adequacy
## g F1*
## Correlation of scores with factors 0.95 0
## Multiple R square of scores with factors 0.90 0
## Minimum correlation of factor score estimates 0.79 -1
##
## Total, General and Subset omega for each subset
## g F1*
## Omega total for total scores and subscales 0.89 0.89
## Omega general for total scores and subscales 0.89 0.89
## Omega group for total scores and subscales 0.00 0.00
## [1] "PEQ_Au1" "PEQ_No2" "PEQ_Au3r" "PEQ_No4" "PEQ_Au5r" "PEQ_No6r"
## [7] "PEQ_Au7r" "PEQ_No8" "PEQ_Au9r" "PEQ_No10" "PEQ_ATT3"
## [1] "PEQ_No2" "PEQ_No4" "PEQ_No6r" "PEQ_No8" "PEQ_No10"
data <- data %>%
mutate(across(contains("PEQ_") & ends_with("r"),
~nice_reverse(.x, 7), .names = "{col}"))
# Get alpha & omega
data %>%
select(starts_with("PEQ_"), -contains("ATT")) %>%
omega(nfactors = 2)##
## Three factors are required for identification -- general factor loadings set to be equal.
## Proceed with caution.
## Think about redoing the analysis with alternative values of the 'option' setting.
## Omega
## Call: omegah(m = m, nfactors = nfactors, fm = fm, key = key, flip = flip,
## digits = digits, title = title, sl = sl, labels = labels,
## plot = plot, n.obs = n.obs, rotate = rotate, Phi = Phi, option = option,
## covar = covar)
## Alpha: 0.79
## G.6: 0.88
## Omega Hierarchical: 0.13
## Omega H asymptotic: 0.14
## Omega Total 0.89
##
## Schmid Leiman Factor loadings greater than 0.2
## g F1* F2* h2 u2 p2
## PEQ_Au1 0.58 0.22 0.40 0.60 0.03
## PEQ_No2 0.23 0.83 0.74 0.26 0.07
## PEQ_Au3r- 0.24 -0.83 0.75 0.25 0.08
## PEQ_No4 0.25 0.84 0.77 0.23 0.08
## PEQ_Au5r- 0.25 -0.81 0.72 0.28 0.09
## PEQ_No6r- -0.20 0.04 0.96 0.09
## PEQ_Au7r- 0.22 -0.79 0.67 0.33 0.07
## PEQ_No8 0.24 0.81 0.72 0.28 0.08
## PEQ_Au9r- 0.23 -0.78 0.67 0.33 0.08
## PEQ_No10 0.25 0.85 0.78 0.22 0.08
##
## With Sums of squares of:
## g F1* F2*
## 0.48 3.10 2.68
##
## general/max 0.15 max/min = 1.16
## mean percent general = 0.07 with sd = 0.02 and cv of 0.23
## Explained Common Variance of the general factor = 0.08
##
## The degrees of freedom are 26 and the fit is 0.14
## The number of observations was 979 with Chi Square = 138.77 with prob < 0.000000000000000023
## The root mean square of the residuals is 0.03
## The df corrected root mean square of the residuals is 0.04
## RMSEA index = 0.067 and the 10 % confidence intervals are 0.056 0.078
## BIC = -40.28
##
## Compare this with the adequacy of just a general factor and no group factors
## The degrees of freedom for just the general factor are 35 and the fit is 5.3
## The number of observations was 979 with Chi Square = 5153.29 with prob < 0
## The root mean square of the residuals is 0.38
## The df corrected root mean square of the residuals is 0.43
##
## RMSEA index = 0.386 and the 10 % confidence intervals are 0.378 0.396
## BIC = 4912.26
##
## Measures of factor score adequacy
## g F1* F2*
## Correlation of scores with factors 0.36 0.93 0.92
## Multiple R square of scores with factors 0.13 0.86 0.84
## Minimum correlation of factor score estimates -0.74 0.72 0.68
##
## Total, General and Subset omega for each subset
## g F1* F2*
## Omega total for total scores and subscales 0.89 0.91 0.85
## Omega general for total scores and subscales 0.13 0.07 0.07
## Omega group for total scores and subscales 0.78 0.84 0.78
## [1] "PGI_1" "PGI_2" "PGI_3" "PGI_4" "PGI_5" "PGI_6" "PGI_7" "PGI_8" "PGI_9"
## Omega_h for 1 factor is not meaningful, just omega_t
## Warning in schmid(m, nfactors, fm, digits, rotate = rotate, n.obs = n.obs, :
## Omega_h and Omega_asymptotic are not meaningful with one factor
## Warning in cov2cor(t(w) %*% r %*% w): diag(.) had 0 or NA entries; non-finite
## result is doubtful
## Omega
## Call: omegah(m = m, nfactors = nfactors, fm = fm, key = key, flip = flip,
## digits = digits, title = title, sl = sl, labels = labels,
## plot = plot, n.obs = n.obs, rotate = rotate, Phi = Phi, option = option,
## covar = covar)
## Alpha: 0.94
## G.6: 0.93
## Omega Hierarchical: 0.94
## Omega H asymptotic: 1
## Omega Total 0.94
##
## Schmid Leiman Factor loadings greater than 0.2
## g F1* h2 u2 p2
## PGI_1 0.83 0.69 0.31 1
## PGI_2 0.77 0.60 0.40 1
## PGI_3 0.78 0.61 0.39 1
## PGI_4 0.71 0.51 0.49 1
## PGI_5 0.82 0.67 0.33 1
## PGI_6 0.83 0.68 0.32 1
## PGI_7 0.81 0.66 0.34 1
## PGI_8 0.73 0.53 0.47 1
## PGI_9 0.79 0.63 0.37 1
##
## With Sums of squares of:
## g F1*
## 5.6 0.0
##
## general/max Inf max/min = NaN
## mean percent general = 1 with sd = 0 and cv of 0
## Explained Common Variance of the general factor = 1
##
## The degrees of freedom are 27 and the fit is 0.2
## The number of observations was 979 with Chi Square = 196.26 with prob < 0.0000000000000000000000000013
## The root mean square of the residuals is 0.03
## The df corrected root mean square of the residuals is 0.03
## RMSEA index = 0.08 and the 10 % confidence intervals are 0.07 0.091
## BIC = 10.32
##
## Compare this with the adequacy of just a general factor and no group factors
## The degrees of freedom for just the general factor are 27 and the fit is 0.2
## The number of observations was 979 with Chi Square = 196.26 with prob < 0.0000000000000000000000000013
## The root mean square of the residuals is 0.03
## The df corrected root mean square of the residuals is 0.03
##
## RMSEA index = 0.08 and the 10 % confidence intervals are 0.07 0.091
## BIC = 10.32
##
## Measures of factor score adequacy
## g F1*
## Correlation of scores with factors 0.97 0
## Multiple R square of scores with factors 0.94 0
## Minimum correlation of factor score estimates 0.88 -1
##
## Total, General and Subset omega for each subset
## g F1*
## Omega total for total scores and subscales 0.94 0.94
## Omega general for total scores and subscales 0.94 0.94
## Omega group for total scores and subscales 0.00 0.00
In this section, we: (a) test assumptions of normality, (b) transform variables violating assumptions, (c) test assumptions of homoscedasticity, and (d) identify and winsorize outliers.
lapply(col.list, function(x)
nice_normality(data,
variable = x,
title = x,
shapiro = TRUE,
histogram = TRUE))## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
##
## [[7]]
##
## [[8]]
##
## [[9]]
##
## [[10]]
##
## [[11]]
##
## [[12]]
##
## [[13]]
##
## [[14]]
##
## [[15]]
##
## [[16]]
##
## [[17]]
##
## [[18]]
##
## [[19]]
##
## [[20]]
##
## [[21]]
##
## [[22]]
##
## [[23]]
##
## [[24]]
##
## [[25]]
##
## [[26]]
##
## [[27]]
##
## [[28]]
##
## [[29]]
##
## [[30]]
##
## [[31]]
##
## [[32]]
##
## [[33]]
##
## [[34]]
##
## [[35]]
##
## [[36]]
##
## [[37]]
##
## [[38]]
##
## [[39]]
##
## [[40]]
##
## [[41]]
##
## [[42]]
##
## [[43]]
##
## [[44]]
##
## [[45]]
##
## [[46]]
##
## [[47]]
##
## [[48]]
##
## [[49]]
##
## [[50]]
##
## [[51]]
##
## [[52]]
##
## [[53]]
##
## [[54]]
##
## [[55]]
##
## [[56]]
##
## [[57]]
Several variables are clearly skewed. Let’s apply transformations. But first, let’s deal with the working memory task, SOPT (Self-Ordered Pointing Task). It is clearly problematic.
The function below transforms variables according to the best
possible transformation (via the bestNormalize package),
and also standardizes the variables.
predict_bestNormalize <- function(var) {
x <- bestNormalize(var, standardize = FALSE, allow_orderNorm = FALSE)
print(cur_column())
print(x$chosen_transform)
cat("\n")
y <- predict(x)
attr(y, "transform") <- c(attributes(y), attributes(x$chosen_transform)$class[1])
y
}
set.seed(42)
data <- data %>%
mutate(across(all_of(col.list),
predict_bestNormalize,
.names = "{.col}.t"))## [1] "aut_growth"
## I(x) Transformation with 979 nonmissing obs.
##
## [1] "aut_deficit"
## I(x) Transformation with 979 nonmissing obs.
##
## [1] "comp_growth"
## Non-Standardized Yeo-Johnson Transformation with 979 nonmissing obs.:
## Estimated statistics:
## - lambda = 2.032843
## - mean (before standardization) = 21.08699
## - sd (before standardization) = 7.407054
##
## [1] "comp_deficit"
## Non-Standardized Yeo-Johnson Transformation with 979 nonmissing obs.:
## Estimated statistics:
## - lambda = 2.072816
## - mean (before standardization) = 22.95349
## - sd (before standardization) = 8.406742
##
## [1] "rel_growth"
## Non-Standardized Box Cox Transformation with 979 nonmissing obs.:
## Estimated statistics:
## - lambda = 1.385137
## - mean (before standardization) = 5.281719
## - sd (before standardization) = 2.207113
##
## [1] "rel_deficit"
## Non-Standardized Box Cox Transformation with 979 nonmissing obs.:
## Estimated statistics:
## - lambda = 1.028973
## - mean (before standardization) = 3.053999
## - sd (before standardization) = 1.460626
##
## [1] "OP"
## Non-Standardized Box Cox Transformation with 979 nonmissing obs.:
## Estimated statistics:
## - lambda = 0.2347702
## - mean (before standardization) = 1.136587
## - sd (before standardization) = 0.6989991
##
## [1] "HP"
## I(x) Transformation with 979 nonmissing obs.
##
## [1] "ofis.wellbeing"
## Non-Standardized Yeo-Johnson Transformation with 972 nonmissing obs.:
## Estimated statistics:
## - lambda = 1.341988
## - mean (before standardization) = 6.685548
## - sd (before standardization) = 3.024205
##
## [1] "GMS_identified"
## Non-Standardized Box Cox Transformation with 979 nonmissing obs.:
## Estimated statistics:
## - lambda = 1.438509
## - mean (before standardization) = 6.507478
## - sd (before standardization) = 2.414665
##
## [1] "GMS_intrinsic"
## Non-Standardized Box Cox Transformation with 979 nonmissing obs.:
## Estimated statistics:
## - lambda = 1.397813
## - mean (before standardization) = 6.064643
## - sd (before standardization) = 2.246057
##
## [1] "GMS_external"
## I(x) Transformation with 979 nonmissing obs.
##
## [1] "GMS_introjected"
## I(x) Transformation with 979 nonmissing obs.
##
## [1] "GMS_integrated"
## Non-Standardized Box Cox Transformation with 979 nonmissing obs.:
## Estimated statistics:
## - lambda = 1.293068
## - mean (before standardization) = 4.957086
## - sd (before standardization) = 1.790833
##
## [1] "GMS_amotivation"
## Non-Standardized Box Cox Transformation with 979 nonmissing obs.:
## Estimated statistics:
## - lambda = 0.3908014
## - mean (before standardization) = 1.306458
## - sd (before standardization) = 0.8161465
##
## [1] "GNSFS_satisfaction"
## I(x) Transformation with 979 nonmissing obs.
##
## [1] "GNSFS_frustration"
## Non-Standardized Box Cox Transformation with 979 nonmissing obs.:
## Estimated statistics:
## - lambda = 0.08081475
## - mean (before standardization) = 0.7578126
## - sd (before standardization) = 0.4652729
##
## [1] "GNSFS_satisfaction_aut"
## Non-Standardized Box Cox Transformation with 979 nonmissing obs.:
## Estimated statistics:
## - lambda = 1.475195
## - mean (before standardization) = 3.795648
## - sd (before standardization) = 1.584459
##
## [1] "GNSFS_satisfaction_rel"
## Non-Standardized sqrt(x + a) Transformation with 979 nonmissing obs.:
## Relevant statistics:
## - a = 0
## - mean (before standardization) = 1.927465
## - sd (before standardization) = 0.2649374
##
## [1] "GNSFS_satisfaction_comp"
## Non-Standardized Yeo-Johnson Transformation with 979 nonmissing obs.:
## Estimated statistics:
## - lambda = 2.071173
## - mean (before standardization) = 12.09834
## - sd (before standardization) = 4.401257
##
## [1] "GNSFS_frustration_aut"
## Non-Standardized sqrt(x + a) Transformation with 979 nonmissing obs.:
## Relevant statistics:
## - a = 0
## - mean (before standardization) = 1.564
## - sd (before standardization) = 0.3470158
##
## [1] "GNSFS_frustration_rel"
## Non-Standardized sqrt(x + a) Transformation with 979 nonmissing obs.:
## Relevant statistics:
## - a = 0
## - mean (before standardization) = 1.360759
## - sd (before standardization) = 0.3549526
##
## [1] "GNSFS_frustration_comp"
## Non-Standardized sqrt(x + a) Transformation with 979 nonmissing obs.:
## Relevant statistics:
## - a = 0
## - mean (before standardization) = 1.461396
## - sd (before standardization) = 0.3772307
##
## [1] "mindset"
## Non-Standardized Box Cox Transformation with 979 nonmissing obs.:
## Estimated statistics:
## - lambda = 0.907965
## - mean (before standardization) = 2.557307
## - sd (before standardization) = 1.30422
##
## [1] "BF_openness"
## Non-Standardized asinh(x) Transformation with 979 nonmissing obs.:
## Relevant statistics:
## - mean (before standardization) = 2.009473
## - sd (before standardization) = 0.3041531
##
## [1] "BF_conscientiousness"
## Non-Standardized Log_b(x + a) Transformation with 979 nonmissing obs.:
## Relevant statistics:
## - a = 0
## - b = 10
## - mean (before standardization) = 0.5927222
## - sd (before standardization) = 0.1126463
##
## [1] "BF_extroversion"
## Non-Standardized Box Cox Transformation with 979 nonmissing obs.:
## Estimated statistics:
## - lambda = 0.5492127
## - mean (before standardization) = 1.227191
## - sd (before standardization) = 0.7470442
##
## [1] "BF_agreableness"
## Non-Standardized Yeo-Johnson Transformation with 979 nonmissing obs.:
## Estimated statistics:
## - lambda = 1.470124
## - mean (before standardization) = 5.593312
## - sd (before standardization) = 2.03017
##
## [1] "BF_neuroticism"
## Non-Standardized sqrt(x + a) Transformation with 979 nonmissing obs.:
## Relevant statistics:
## - a = 0
## - mean (before standardization) = 1.60136
## - sd (before standardization) = 0.3830472
##
## [1] "AATQ_avoidance"
## I(x) Transformation with 979 nonmissing obs.
##
## [1] "AATQ_approach"
## Non-Standardized Yeo-Johnson Transformation with 979 nonmissing obs.:
## Estimated statistics:
## - lambda = 2.060613
## - mean (before standardization) = 20.34498
## - sd (before standardization) = 6.847511
##
## [1] "MPOS_mastery_approach"
## Non-Standardized sqrt(x + a) Transformation with 979 nonmissing obs.:
## Relevant statistics:
## - a = 0
## - mean (before standardization) = 2.302841
## - sd (before standardization) = 0.3004559
##
## [1] "MPOS_mastery_avoidance"
## Non-Standardized Yeo-Johnson Transformation with 979 nonmissing obs.:
## Estimated statistics:
## - lambda = 1.21938
## - mean (before standardization) = 5.434897
## - sd (before standardization) = 2.527489
##
## [1] "MPOS_performance_approach"
## Non-Standardized Yeo-Johnson Transformation with 979 nonmissing obs.:
## Estimated statistics:
## - lambda = 1.242952
## - mean (before standardization) = 5.691357
## - sd (before standardization) = 2.591905
##
## [1] "MPOS_performance_avoidance"
## Non-Standardized Yeo-Johnson Transformation with 979 nonmissing obs.:
## Estimated statistics:
## - lambda = 1.239604
## - mean (before standardization) = 5.725372
## - sd (before standardization) = 2.583794
##
## [1] "RFPS_flexible"
## Non-Standardized Box Cox Transformation with 979 nonmissing obs.:
## Estimated statistics:
## - lambda = 1.999957
## - mean (before standardization) = 14.32247
## - sd (before standardization) = 5.639642
##
## [1] "RFPS_rigid"
## I(x) Transformation with 979 nonmissing obs.
##
## [1] "PGDS_autonomy"
## Non-Standardized sqrt(x + a) Transformation with 979 nonmissing obs.:
## Relevant statistics:
## - a = 0
## - mean (before standardization) = 2.417327
## - sd (before standardization) = 0.196878
##
## [1] "PGDS_mastery"
## I(x) Transformation with 979 nonmissing obs.
##
## [1] "PGDS_positive"
## Non-Standardized sqrt(x + a) Transformation with 979 nonmissing obs.:
## Relevant statistics:
## - a = 0
## - mean (before standardization) = 2.250554
## - sd (before standardization) = 0.3558908
##
## [1] "PGDS_acceptance"
## Non-Standardized sqrt(x + a) Transformation with 979 nonmissing obs.:
## Relevant statistics:
## - a = 0
## - mean (before standardization) = 2.272361
## - sd (before standardization) = 0.3441205
##
## [1] "PGDS_purpose"
## I(x) Transformation with 979 nonmissing obs.
##
## [1] "SAS"
## Non-Standardized Yeo-Johnson Transformation with 977 nonmissing obs.:
## Estimated statistics:
## - lambda = 1.685933
## - mean (before standardization) = 4.824146
## - sd (before standardization) = 0.7652293
##
## [1] "SAS_emotions"
## Non-Standardized double reversed Log_b(x + a) Transformation with 977 nonmissing obs.:
## Relevant statistics:
## - a =
## - b = 10
## - max(x) = 4.3 ; min(x) = 0.7
## - mean (before standardization) = 0.4421254
## - sd (before standardization) = 0.2743446
##
## [1] "SAS_autonomy"
## I(x) Transformation with 977 nonmissing obs.
##
## [1] "SAS_trust"
## Non-Standardized double reversed Log_b(x + a) Transformation with 977 nonmissing obs.:
## Relevant statistics:
## - a =
## - b = 10
## - max(x) = 4.3 ; min(x) = 0.7
## - mean (before standardization) = 0.503505
## - sd (before standardization) = 0.2493298
##
## [1] "SAS_acceptance"
## I(x) Transformation with 977 nonmissing obs.
##
## [1] "SAS_desirability"
## Non-Standardized Yeo-Johnson Transformation with 977 nonmissing obs.:
## Estimated statistics:
## - lambda = 1.702695
## - mean (before standardization) = 5.600014
## - sd (before standardization) = 1.717305
##
## [1] "growth_life"
## Non-Standardized Yeo-Johnson Transformation with 975 nonmissing obs.:
## Estimated statistics:
## - lambda = 2.786245
## - mean (before standardization) = 73.06148
## - sd (before standardization) = 29.18779
##
## [1] "GMI"
## Non-Standardized Yeo-Johnson Transformation with 975 nonmissing obs.:
## Estimated statistics:
## - lambda = 1.883414
## - mean (before standardization) = 15.50796
## - sd (before standardization) = 5.671237
##
## [1] "GMI_reflective"
## Non-Standardized Box Cox Transformation with 975 nonmissing obs.:
## Estimated statistics:
## - lambda = 1.546375
## - mean (before standardization) = 6.758422
## - sd (before standardization) = 3.116057
##
## [1] "GMI_experiential"
## Non-Standardized Yeo-Johnson Transformation with 975 nonmissing obs.:
## Estimated statistics:
## - lambda = 2.454685
## - mean (before standardization) = 41.71894
## - sd (before standardization) = 16.58612
##
## [1] "growth_oldham"
## Non-Standardized Yeo-Johnson Transformation with 975 nonmissing obs.:
## Estimated statistics:
## - lambda = 2.35361
## - mean (before standardization) = 37.03443
## - sd (before standardization) = 12.97824
##
## [1] "PEQ"
## Non-Standardized sqrt(x + a) Transformation with 973 nonmissing obs.:
## Relevant statistics:
## - a = 0
## - mean (before standardization) = 2.152261
## - sd (before standardization) = 0.2364469
##
## [1] "PEQ_augmentation"
## Non-Standardized double reversed Log_b(x + a) Transformation with 973 nonmissing obs.:
## Relevant statistics:
## - a =
## - b = 10
## - max(x) = 7.6 ; min(x) = 0.4
## - mean (before standardization) = 0.5395458
## - sd (before standardization) = 0.2953644
##
## [1] "PEQ_novelty"
## Non-Standardized double reversed Log_b(x + a) Transformation with 973 nonmissing obs.:
## Relevant statistics:
## - a =
## - b = 10
## - max(x) = 7.6 ; min(x) = 0.4
## - mean (before standardization) = 0.3757817
## - sd (before standardization) = 0.1751795
##
## [1] "PGI"
## Non-Standardized Yeo-Johnson Transformation with 972 nonmissing obs.:
## Estimated statistics:
## - lambda = 1.923902
## - mean (before standardization) = 12.98004
## - sd (before standardization) = 4.660486
Note. The I(x) transformations above are actually not transformations, but a shorthand function for passing the data “as is”. Suggesting the package estimated the various attempted transformations did not improve normality in those cases, so no transformation is used. This only appears when standardize is set to FALSE. When set to TRUE, for those variables, it is actually center_scale(x), suggesting that the data are only CENTERED because they need no transformation (no need to be scaled), only to be centered.
Let’s check if normality was corrected.
# Group normality
named.col.list <- setNames(col.list, unlist(lapply(data, function(x) attributes(x)$transform)))
lapply(named.col.list, function(x)
nice_normality(data,
variable = x,
title = x,
shapiro = TRUE,
histogram = TRUE))## $no_transform
##
## $no_transform
##
## $yeojohnson
##
## $yeojohnson
##
## $boxcox
##
## $boxcox
##
## $boxcox
##
## $no_transform
##
## $yeojohnson
##
## $boxcox
##
## $boxcox
##
## $no_transform
##
## $no_transform
##
## $boxcox
##
## $boxcox
##
## $no_transform
##
## $boxcox
##
## $boxcox
##
## $sqrt_x
##
## $yeojohnson
##
## $sqrt_x
##
## $sqrt_x
##
## $sqrt_x
##
## $boxcox
##
## $arcsinh_x
##
## $log_x
##
## $boxcox
##
## $yeojohnson
##
## $sqrt_x
##
## $no_transform
##
## $yeojohnson
##
## $sqrt_x
##
## $yeojohnson
##
## $yeojohnson
##
## $yeojohnson
##
## $boxcox
##
## $no_transform
##
## $sqrt_x
##
## $no_transform
##
## $sqrt_x
##
## $sqrt_x
##
## $no_transform
##
## $yeojohnson
##
## $double_reverse_log
##
## $no_transform
##
## $double_reverse_log
##
## $no_transform
##
## $yeojohnson
##
## $yeojohnson
##
## $yeojohnson
##
## $boxcox
##
## $yeojohnson
##
## $yeojohnson
##
## $sqrt_x
##
## $double_reverse_log
##
## $double_reverse_log
##
## $yeojohnson
Looks rather reasonable now, though not perfect (fortunately contrasts are quite robust against violations of normality).
We can now resume with checking outliers.
We check outliers visually with the plot_outliers
function, which draws red lines at +/- 3 median absolute deviations.
plots(lapply(col.list, function(x) {
plot_outliers(data, response = x, ytitle = x, binwidth = 0.3)
}),
n_columns = 2)There are some outliers, but nothing unreasonable. Let’s still check with the 3 median absolute deviations (MAD) method.
## 236 outlier(s) based on 3 median absolute deviations for variable(s):
## aut_growth.t, aut_deficit.t, comp_growth.t, comp_deficit.t, rel_growth.t, rel_deficit.t, OP.t, HP.t, ofis.wellbeing.t, GMS_identified.t, GMS_intrinsic.t, GMS_external.t, GMS_introjected.t, GMS_integrated.t, GMS_amotivation.t, GNSFS_satisfaction.t, GNSFS_frustration.t, GNSFS_satisfaction_aut.t, GNSFS_satisfaction_rel.t, GNSFS_satisfaction_comp.t, GNSFS_frustration_aut.t, GNSFS_frustration_rel.t, GNSFS_frustration_comp.t, mindset.t, BF_openness.t, BF_conscientiousness.t, BF_extroversion.t, BF_agreableness.t, BF_neuroticism.t, AATQ_avoidance.t, AATQ_approach.t, MPOS_mastery_approach.t, MPOS_mastery_avoidance.t, MPOS_performance_approach.t, MPOS_performance_avoidance.t, RFPS_flexible.t, RFPS_rigid.t, PGDS_autonomy.t, PGDS_mastery.t, PGDS_positive.t, PGDS_acceptance.t, PGDS_purpose.t, SAS.t, SAS_emotions.t, SAS_autonomy.t, SAS_trust.t, SAS_acceptance.t, SAS_desirability.t, growth_life.t, GMI.t, GMI_reflective.t, GMI_experiential.t, growth_oldham.t, PEQ.t, PEQ_augmentation.t, PEQ_novelty.t, PGI.t
##
## The following participants were considered outliers for more than one variable:
##
## Row n
## 1 1 2
## 2 52 3
## 3 54 3
## 4 57 2
## 5 71 4
## 6 112 3
## 7 142 2
## 8 147 2
## 9 150 3
## 10 155 2
## 11 178 2
## 12 180 5
## 13 183 2
## 14 196 8
## 15 199 2
## 16 233 4
## 17 247 3
## 18 261 3
## 19 274 4
## 20 283 2
## 21 287 3
## 22 303 2
## 23 310 2
## 24 318 2
## 25 323 3
## 26 361 5
## 27 362 4
## 28 365 2
## 29 380 5
## 30 398 3
## 31 419 5
## 32 452 2
## 33 498 6
## 34 499 2
## 35 501 2
## 36 516 3
## 37 542 6
## 38 549 2
## 39 564 2
## 40 590 2
## 41 624 3
## 42 631 2
## 43 656 3
## 44 660 2
## 45 662 2
## 46 703 5
## 47 715 3
## 48 727 6
## 49 745 2
## 50 749 2
## 51 773 2
## 52 782 2
## 53 783 6
## 54 787 2
## 55 792 3
## 56 803 2
## 57 809 2
## 58 811 2
## 59 819 3
## 60 825 7
## 61 839 2
## 62 865 2
## 63 866 6
## 64 870 3
## 65 881 3
## 66 897 3
## 67 915 2
## 68 924 6
## 69 926 5
## 70 933 2
## 71 935 2
## 72 940 5
## 73 976 4
##
## Outliers per variable:
##
## $aut_growth.t
## Row aut_growth.t_mad
## 1 52 -3.372454
## 2 103 -3.147624
## 3 164 -4.496605
## 4 178 -4.946266
## 5 258 -3.597284
## 6 310 -3.147624
## 7 365 -3.147624
## 8 752 -3.597284
## 9 927 -3.597284
## 10 940 -3.147624
##
## $HP.t
## Row HP.t_mad
## 1 180 -3.372454
## 2 318 -3.102657
## 3 493 -3.372454
## 4 792 -3.372454
## 5 811 -3.102657
## 6 819 -3.372454
## 7 825 -3.372454
##
## $GNSFS_satisfaction.t
## Row GNSFS_satisfaction.t_mad
## 1 161 -3.035208
## 2 196 -3.147624
## 3 247 -3.372454
## 4 274 -3.035208
## 5 287 -3.260039
## 6 361 -3.597284
## 7 624 -3.260039
## 8 656 -3.035208
## 9 703 -3.035208
## 10 783 -3.597284
## 11 825 -3.484869
## 12 862 -3.035208
## 13 870 -3.260039
## 14 976 -3.035208
##
## $GNSFS_satisfaction_rel.t
## Row GNSFS_satisfaction_rel.t_mad
## 1 150 -3.315016
## 2 156 -3.315016
## 3 196 -3.758667
## 4 274 -3.315016
## 5 287 -3.758667
## 6 299 -3.315016
## 7 361 -3.315016
## 8 384 -3.758667
## 9 537 -3.315016
## 10 554 -3.315016
## 11 631 -3.758667
## 12 656 -3.315016
## 13 727 -3.758667
## 14 782 -3.315016
## 15 783 -3.315016
## 16 825 -3.758667
## 17 870 -3.758667
## 18 897 -3.758667
## 19 915 -3.315016
##
## $BF_openness.t
## Row BF_openness.t_mad
## 1 142 -3.758792
## 2 183 -3.758792
## 3 199 -3.758792
## 4 320 -3.758792
## 5 391 -3.758792
## 6 423 -3.758792
## 7 452 -3.758792
## 8 472 -3.758792
## 9 564 -3.758792
## 10 703 -3.758792
## 11 715 -3.758792
## 12 773 -3.758792
## 13 792 -3.758792
## 14 819 -3.758792
## 15 870 -3.758792
## 16 929 -3.758792
## 17 933 -3.758792
## 18 934 -3.758792
## 19 935 -3.758792
## 20 940 -3.758792
## 21 951 -3.758792
##
## $BF_conscientiousness.t
## Row BF_conscientiousness.t_mad
## 1 745 -4.190319
## 2 926 -4.190319
## 3 940 -4.190319
##
## $MPOS_mastery_approach.t
## Row MPOS_mastery_approach.t_mad
## 1 1 -3.028616
## 2 4 -3.028616
## 3 52 -3.491287
## 4 114 -5.088669
## 5 159 -6.447824
## 6 178 -4.513146
## 7 183 -3.491287
## 8 196 -3.491287
## 9 206 -3.028616
## 10 225 -3.028616
## 11 233 -3.491287
## 12 261 -3.028616
## 13 262 -3.028616
## 14 264 -6.447824
## 15 271 -3.028616
## 16 276 -3.983899
## 17 283 -3.028616
## 18 284 -4.513146
## 19 287 -3.028616
## 20 296 -3.028616
## 21 308 -5.088669
## 22 310 -3.491287
## 23 315 -3.028616
## 24 318 -3.028616
## 25 323 -3.028616
## 26 331 -3.028616
## 27 334 -3.983899
## 28 342 -3.491287
## 29 380 -6.447824
## 30 390 -3.983899
## 31 443 -3.028616
## 32 452 -3.491287
## 33 456 -4.513146
## 34 464 -3.028616
## 35 475 -5.725260
## 36 492 -3.983899
## 37 498 -3.028616
## 38 499 -3.983899
## 39 504 -3.491287
## 40 516 -3.028616
## 41 533 -3.028616
## 42 536 -3.028616
## 43 542 -3.983899
## 44 543 -6.447824
## 45 609 -3.028616
## 46 660 -3.028616
## 47 662 -6.447824
## 48 673 -3.028616
## 49 685 -5.725260
## 50 692 -3.983899
## 51 745 -6.447824
## 52 765 -3.028616
## 53 780 -3.491287
## 54 782 -3.028616
## 55 787 -3.491287
## 56 792 -3.491287
## 57 803 -3.491287
## 58 825 -5.088669
## 59 838 -3.028616
## 60 839 -5.088669
## 61 866 -6.447824
## 62 906 -3.028616
## 63 907 -3.491287
## 64 924 -4.513146
## 65 925 -3.491287
##
## $PGDS_autonomy.t
## Row PGDS_autonomy.t_mad
## 1 71 -3.652144
## 2 112 -3.652144
## 3 180 -3.652144
## 4 196 -3.652144
## 5 233 -3.175207
## 6 361 -3.652144
## 7 362 -3.175207
## 8 398 -3.175207
## 9 419 -3.652144
## 10 498 -3.652144
## 11 542 -3.652144
## 12 549 -3.175207
## 13 703 -3.175207
## 14 727 -3.175207
## 15 783 -3.175207
## 16 809 -3.175207
## 17 825 -3.652144
## 18 881 -3.175207
## 19 924 -3.652144
## 20 926 -3.652144
##
## $PGDS_mastery.t
## Row PGDS_mastery.t_mad
## 1 71 -3.035208
## 2 112 -3.035208
## 3 196 -3.035208
## 4 361 -3.035208
## 5 398 -3.035208
## 6 419 -3.035208
## 7 498 -3.035208
## 8 542 -3.035208
## 9 727 -3.035208
## 10 783 -3.035208
## 11 866 -3.035208
## 12 924 -3.035208
## 13 926 -3.035208
## 14 976 -3.035208
##
## $PGDS_positive.t
## Row PGDS_positive.t_mad
## 1 52 -3.699143
## 2 54 -3.073624
## 3 57 -3.073624
## 4 71 -4.441126
## 5 142 -3.699143
## 6 147 -3.073624
## 7 150 -3.699143
## 8 180 -3.073624
## 9 196 -4.441126
## 10 208 -3.073624
## 11 233 -3.073624
## 12 240 -3.073624
## 13 247 -3.699143
## 14 261 -3.073624
## 15 273 -3.699143
## 16 274 -4.441126
## 17 279 -3.073624
## 18 303 -3.699143
## 19 323 -3.073624
## 20 348 -3.073624
## 21 362 -4.441126
## 22 380 -4.441126
## 23 419 -3.073624
## 24 451 -4.441126
## 25 454 -4.441126
## 26 498 -4.441126
## 27 501 -3.073624
## 28 516 -3.073624
## 29 542 -3.073624
## 30 549 -3.699143
## 31 590 -3.699143
## 32 606 -3.073624
## 33 610 -3.073624
## 34 612 -3.073624
## 35 624 -3.699143
## 36 703 -3.699143
## 37 726 -3.699143
## 38 727 -3.699143
## 39 783 -4.441126
## 40 787 -3.073624
## 41 803 -3.073624
## 42 809 -3.073624
## 43 811 -3.073624
## 44 825 -3.073624
## 45 839 -4.441126
## 46 866 -4.441126
## 47 881 -3.073624
## 48 897 -4.441126
## 49 915 -3.073624
## 50 924 -4.441126
## 51 926 -3.073624
## 52 935 -3.699143
## 53 976 -3.699143
##
## $PGDS_acceptance.t
## Row PGDS_acceptance.t_mad
## 1 54 -3.073624
## 2 57 -3.073624
## 3 147 -3.073624
## 4 150 -3.073624
## 5 180 -3.699143
## 6 196 -4.441126
## 7 226 -3.073624
## 8 233 -4.441126
## 9 247 -3.699143
## 10 257 -3.073624
## 11 261 -3.073624
## 12 323 -3.073624
## 13 339 -3.073624
## 14 361 -3.073624
## 15 362 -3.073624
## 16 380 -4.441126
## 17 398 -4.441126
## 18 419 -4.441126
## 19 498 -4.441126
## 20 516 -3.073624
## 21 542 -4.441126
## 22 564 -3.073624
## 23 590 -3.699143
## 24 624 -3.073624
## 25 656 -4.441126
## 26 660 -3.699143
## 27 703 -4.441126
## 28 715 -3.699143
## 29 727 -4.441126
## 30 749 -3.073624
## 31 783 -4.441126
## 32 785 -3.073624
## 33 825 -3.073624
## 34 866 -4.441126
## 35 881 -3.073624
## 36 919 -3.073624
## 37 924 -4.441126
## 38 926 -3.073624
## 39 960 -3.073624
## 40 976 -3.699143
##
## $PGDS_purpose.t
## Row PGDS_purpose.t_mad
## 1 54 -3.035208
## 2 112 -3.035208
## 3 180 -3.035208
## 4 196 -3.035208
## 5 309 -3.035208
## 6 362 -3.035208
## 7 380 -3.035208
## 8 419 -3.035208
## 9 498 -3.035208
## 10 501 -3.035208
## 11 542 -3.035208
## 12 715 -3.035208
## 13 727 -3.035208
## 14 749 -3.035208
## 15 808 -3.035208
## 16 819 -3.035208
## 17 866 -3.035208
## 18 924 -3.035208
##
## $SAS.t
## Row SAS.t_mad
## 1 631 -3.348864
## 2 865 3.093705
##
## $SAS_autonomy.t
## Row SAS_autonomy.t_mad
## 1 56 3.035208
##
## $SAS_trust.t
## Row SAS_trust.t_mad
## 1 2 3.338322
## 2 5 3.338322
## 3 8 3.338322
## 4 23 3.338322
## 5 26 3.338322
## 6 33 3.338322
## 7 40 3.338322
## 8 64 3.338322
## 9 76 3.338322
## 10 97 3.338322
## 11 135 3.338322
## 12 155 3.338322
## 13 182 3.338322
## 14 189 3.338322
## 15 207 3.338322
## 16 212 3.338322
## 17 223 3.338322
## 18 241 3.338322
## 19 245 3.338322
## 20 282 3.338322
## 21 283 3.338322
## 22 293 3.338322
## 23 305 3.338322
## 24 317 3.338322
## 25 324 3.338322
## 26 346 3.338322
## 27 356 3.338322
## 28 365 3.338322
## 29 367 3.338322
## 30 376 3.338322
## 31 377 3.338322
## 32 381 3.338322
## 33 393 3.338322
## 34 406 3.338322
## 35 408 3.338322
## 36 410 3.338322
## 37 421 3.338322
## 38 424 3.338322
## 39 437 3.338322
## 40 444 3.338322
## 41 448 3.338322
## 42 453 3.338322
## 43 458 3.338322
## 44 465 3.338322
## 45 486 3.338322
## 46 489 3.338322
## 47 497 3.338322
## 48 499 3.338322
## 49 500 3.338322
## 50 526 3.338322
## 51 528 3.338322
## 52 544 3.338322
## 53 558 3.338322
## 54 571 3.338322
## 55 588 3.338322
## 56 592 3.338322
## 57 598 3.338322
## 58 599 3.338322
## 59 611 3.338322
## 60 616 3.338322
## 61 645 3.338322
## 62 662 3.338322
## 63 686 3.338322
## 64 729 3.338322
## 65 730 3.338322
## 66 736 3.338322
## 67 740 3.338322
## 68 746 3.338322
## 69 755 3.338322
## 70 766 3.338322
## 71 772 3.338322
## 72 788 3.338322
## 73 791 3.338322
## 74 804 3.338322
## 75 814 3.338322
## 76 854 3.338322
## 77 857 3.338322
## 78 860 3.338322
## 79 864 3.338322
## 80 865 3.338322
## 81 875 3.338322
## 82 886 3.338322
## 83 889 3.338322
## 84 896 3.338322
## 85 909 3.338322
## 86 933 3.338322
## 87 940 3.338322
## 88 941 3.338322
## 89 962 3.338322
## 90 971 3.338322
##
## $PEQ.t
## Row PEQ.t_mad
## 1 71 -3.942159
## 2 199 -3.441274
## 3 274 -3.283744
## 4 303 -3.441274
## 5 380 -3.770051
## 6 706 -3.942159
## 7 773 -3.441274
## 8 866 -3.603244
## 9 940 -4.696023
##
## $PEQ_novelty.t
## Row PEQ_novelty.t_mad
## 1 1 3.098949
## 2 155 3.773440
## 3 272 3.098949
## 4 638 3.098949
## 5 856 3.773440
## 6 897 3.773440
## 7 950 3.098949
After our transformations, there are 287 outliers. Even though our sample is very large, that’s about a third of our sample.
Visual assessment and the MAD method confirm we have some outlier values. We could ignore them but because they could have disproportionate influence on the models, one recommendation is to winsorize them by bringing the values at 3 SD. Instead of using the standard deviation around the mean, however, we use the absolute deviation around the median, as it is more robust to extreme observations. For a discussion, see:
Leys, C., Klein, O., Bernard, P., & Licata, L. (2013). Detecting outliers: Do not use standard deviation around the mean, use absolute deviation around the median. Journal of Experimental Social Psychology, 49(4), 764–766. https://doi.org/10.1016/j.jesp.2013.03.013
# Winsorize variables of interest with MAD
data <- data %>%
mutate(across(all_of(col.list),
winsorize_mad,
.names = "{.col}.w")) %>%
ungroup()
# Update col.list
col.list <- paste0(col.list, ".w")Outliers are still present but were brought back within reasonable limits, where applicable.
For multivariate outliers, it is recommended to use the Minimum Covariance Determinant, a robust version of the Mahalanobis distance (MCD, Leys et al., 2019).
Leys, C., Delacre, M., Mora, Y. L., Lakens, D., & Ley, C. (2019). How to classify, detect, and manage univariate and multivariate outliers, with emphasis on pre-registration. International Review of Social Psychology, 32(1).
However, in our preregistration we indicated we would use the regular Mahalanobis distance because its robust version generally tags a lot of outliers.
data.vars <- data[col.list]
col.list <- gsub(".t.w", "", col.list)
names(data.vars) <- col.list
# Error in solve.default(cov, ...) :
# system is computationally singular: reciprocal condition number
# We have to remove global averages to make it work
# We have to exclude two variables that are too large following the transformations, the GNSFS_comp variables...
# data.vars[c(7:8, 52, 22, 45, 25)] %>%
# names()
outliers <- data.vars %>% # [-c(7:8, 52, 22, 45, 25)] %>%
select(-c(#growth.orientation, deficit.orientation,
GMI, SAS, #PEQ,
GNSFS_satisfaction_comp,
GNSFS_frustration_comp
)) %>%
na.omit() %>%
check_outliers(method = "mahalanobis")
outliers## 73 outliers detected: cases 1, 37, 89, 106, 120, 137, 150, 159, 199,
## 233, 258, 274, 275, 280, 299, 303, 314, 355, 360, 364, 375, 378, 382,
## 416, 451, 458, 477, 484, 495, 501, 521, 525, 533, 539, 541, 550, 560,
## 582, 589, 592, 602, 617, 620, 626, 652, 671, 691, 693, 701, 721, 726,
## 739, 741, 765, 775, 776, 777, 780, 787, 812, 859, 863, 867, 874, 890,
## 912, 920, 926, 933, 953, 956, 962, 969.
## - Based on the following method and threshold: mahalanobis (90.573).
## - For variables: aut_growth, aut_deficit, comp_growth, comp_deficit,
## rel_growth, rel_deficit, OP, HP, ofis.wellbeing, GMS_identified,
## GMS_intrinsic, GMS_external, GMS_introjected, GMS_integrated,
## GMS_amotivation, GNSFS_satisfaction, GNSFS_frustration,
## GNSFS_satisfaction_aut, GNSFS_satisfaction_rel, GNSFS_frustration_aut,
## GNSFS_frustration_rel, mindset, BF_openness, BF_conscientiousness,
## BF_extroversion, BF_agreableness, BF_neuroticism, AATQ_avoidance,
## AATQ_approach, MPOS_mastery_approach, MPOS_mastery_avoidance,
## MPOS_performance_approach, MPOS_performance_avoidance, RFPS_flexible,
## RFPS_rigid, PGDS_autonomy, PGDS_mastery, PGDS_positive, PGDS_acceptance,
## PGDS_purpose, SAS_emotions, SAS_autonomy, SAS_trust, SAS_acceptance,
## SAS_desirability, growth_life, GMI_reflective, GMI_experiential,
## growth_oldham, PEQ, PEQ_augmentation, PEQ_novelty, PGI.
There are 73 multivariate outliers according to the Mahalanobis distance method. That’s about 7% of our sample. Let’s exclude them.
## [1] 0.07456588
## [1] "906 participants ()"
##
##
## [Correlation matrix 'cormatrix.xlsx' has been saved to working directory (or where specified).]
## Warning in xl_open.default(paste0(filename, ".xlsx")): will not open file when
## not interactive
## NULL
In this report, we aimed to validate the Needs Orientation Scale, and we found that, so far, the fit is not excellent.
The package references and the full script of executive code contained in this document is reproduced in the tabs below.
The analysis was done using the R Statistical language (v4.2.0; R Core Team, 2022) on Windows 10 x64, using the packages flextable (v0.9.3.2), sjlabelled (v1.2.0), parameters (v0.21.1), performance (v0.10.4), see (v0.8.0), report (v0.5.7.10), correlation (v0.8.4), datawizard (v0.8.0), bestNormalize (v1.9.0), nFactors (v2.4.1.1), lavaan (v0.6.15), lattice (v0.21.8), lavaanExtra (v0.1.6), rempsyc (v0.1.4.1), visdat (v0.6.0), dplyr (v1.1.2), purrr (v1.0.1), tidyr (v1.3.0) and psych (v2.3.6).
library(rempsyc)
library(lavaanExtra)
library(lavaan)
library(dplyr)
library(purrr)
library(parameters)
library(see)
library(performance)
library(correlation)
library(flextable)
library(sjlabelled)
library(datawizard)
library(report)
library(visdat)
library(bestNormalize)
library(tidyr)
library(psych)
library(nFactors)
# Read data
# source("data/survey_448961_R_syntax_file_numeric.R")
# saveRDS(data, "data/raw_data_processed.rds")
data <- readRDS("data/raw_data_processed.rds")
report_participants(data, threshold = 1) %>% cat
data.vars %>%
cormatrix_excel("cormatrix", print.mat = FALSE)
sessionInfo() %>% report %>% summary
report::cite_packages(sessionInfo())
# Check names/row numbers
# names(data)
rename_needs <- function(x) {
gsub("a|b", "", x) %>%
gsub("N", "needs_", .)%>%
gsub("R", "rel", .) %>%
gsub("A", "aut", .)%>%
gsub("C", "comp", .)%>%
gsub("D", "deficit", .) %>%
gsub("G", "growth", .)
}
needs.names <- data %>%
select(NR_D1:NCb_G9) %>%
names
data <- data %>%
rename_with(~rename_needs(.x), all_of(needs.names)) %>%
rename(BF_E1r = "BF_rE1") %>%
rename(BF_C3r = "BF_rC3") %>%
rename(BF_N4r = "BF_rN4") %>%
rename(BF_O5r = "BF_rO5") %>%
rename(BF_A7r = "BF_rA7") %>%
rename(SAc_ACC6r = "SAc_rACC6") %>%
rename(SAc_ACC8r = "SAc_rACC8") %>%
rename(SAc_TR13r = "SAc_rTR13") %>%
rename(SAc_ACC14r = "SAc_rACC14") %>%
rename(PEQ_Au3r = "PEQ_rAu3") %>%
rename(PEQ_Au5r = "PEQ_rAu5") %>%
rename(PEQ_No6r = "PEQ_rNo6") %>%
rename(PEQ_Au7r = "PEQ_rAu7") %>%
rename(PEQ_Au9r = "PEQ_rAu9")
data %>%
select(starts_with("SAc_")) %>%
names
# Get data labels
dataset.labels <- data.frame(vars = attributes(data)$variable.labels) |>
t() |>
as.data.frame()
names(dataset.labels) <- names(data[1:244])
extract_labels <- \(x) {
gsub(".*?\\[(.*?)\\].*", "\\1", x)}
dataset.labels <- dataset.labels %>%
lapply(extract_labels) %>%
as.data.frame()
# Test it
dataset.labels$Sco
dataset.labels$Ra_4
dataset.labels$needs_rel_growth4
# Define convenience function
trimws_toupper <- function(x) {
if(is.character(x)) {
x <- trimws(toupper(x))
gsub("[^0-9A-Z]+", "", x)
} else {
x
}
}
# Which participants have entered an ID != 11?
data %>%
mutate(Code = trimws_toupper(Code)) %>%
filter(nchar(Code) < 12 | nchar(Code) > 16) %>%
select(id, Code) %>%
View()
# 14 people with weird codes.
# Let's exclude "REMITEST" at least since that was me.
data <- data %>%
mutate(Code = trimws_toupper(Code)) %>%
#filter(nchar(Code) >= 12 & nchar(Code) <= 16)
filter(Code != "REMITEST")
# This also removes 17 rows with no ID or responses at all
# Caro: should we exclude those with wrong participant IDs?
# Check for duplicate Respondent ID!!
sum(duplicated(data$Code))
# There's 1 duplicates Respondent IDs
# Let's investigate them
data %>%
rempsyc::extract_duplicates("Code") %>%
View()
# Let's keep the best duplicate
data <- data %>%
best_duplicate("Code")
# Get the names of the attention check items
ATT <- data %>%
select(contains("ATT")) %>%
names()
ATT
# Extract the correct answers:
dataset.labels[ATT]
data[ATT] %>%
summarize(across(everything(), \(x) distribution_mode(x)))
# Let's create a new column to see if they got it right
data <- data %>%
mutate(attention1 = .[[ATT[1]]] == 1,
attention2 = .[[ATT[2]]] == 7,
attention3 = .[[ATT[3]]] == 4)
# How many people didn't give the correct attention check 1?
nrow(data) - sum(data$attention1, na.rm = TRUE)
# 185 people
# How many people didn't give the correct attention check 2?
nrow(data) - sum(data$attention2, na.rm = TRUE)
# 210 people
# How many people didn't give the correct attention check 2?
nrow(data) - sum(data$attention3, na.rm = TRUE)
# 244 people
# Calculate sum of attention scores
data <- data %>%
mutate(attention.total = rowSums(select(
., attention1, attention2, attention3),
na.rm = TRUE))
# Check the distribution of scores
data %>%
count(attention.total)
# Exclude those with less than two correct answers
data <- data %>%
filter(attention.total >= 2)
# Now at 980 rows
# Check for duplicate IP addresses!!
sum(duplicated(data$ipaddr))
# There's 1 duplicate IP addresses
# Let's investigate them
data %>%
rempsyc::extract_duplicates("ipaddr") %>%
View()
# Remove duplicated IP Addresses
data <- data %>%
best_duplicate("ipaddr")
# After removing duplicated IP addresses, new n = 971
# Calculate sum of missing values, per row
data <- data %>%
mutate(sum.NA = rowSums(is.na(select(
., needs_rel_deficit1:WB_4))),
NA.percent = round(sum.NA / ncol(select(
., needs_rel_deficit1:WB_4)), 2))
# Count how many missing values
data %>%
count(sum.NA, NA.percent)
# Check filter for excluding those with more than 50% missing values
data %>%
filter(NA.percent > 0.50) %>%
select(Code, sum.NA, NA.percent)
# Nobody to exclude. Final N = 980
report_participants(data, threshold = 1) %>% cat
data %>%
nice_density("Age", histogram = TRUE)
data %>%
count(Gender, sort = TRUE)
dataset.labels$Sco
data %>%
count(Sco, sort = TRUE)
dataset.labels$Ra_other
data <- data %>%
mutate(Ra_1 = ifelse(Ra_1 == "Yes", "White", NA),
Ra_2 = ifelse(Ra_2 == "Yes", "Black", NA),
Ra_3 = ifelse(Ra_3 == "Yes", "Native", NA),
Ra_4 = ifelse(Ra_4 == "Yes", "Asian", NA),
Ra_5 = ifelse(Ra_5 == "Yes", "Hawaiian", NA),
Ra_6 = ifelse(Ra_6 == "Yes", "Other", NA)) %>%
unite("Race", Ra_1:Ra_6, na.rm = TRUE, sep = ", ") %>%
mutate(Race = ifelse(grepl(",", .data$Race), "Other", Race),
Race = ifelse(Race == "", "Other", Race))
data %>%
count(Race, sort = TRUE)
# Check for nice_na
nice_na(data, scales = c(
"NR_", "NA_", "NC_", "GMS_", "N1_", "N2_", "MS_", "P_", "BF_",
"Tem_", "MPO_", "Pers_", "PGD_", "SAc_", "GiL_", "GMI_", "Gr_",
"PEQ_", "PGI_", "WB_"))
data %>%
select(needs_rel_deficit1:WB_4) %>%
vis_miss
set.seed(100)
row_indices <- sample(seq_len(nrow(data)),
size = nrow(data) / 2)
data_EFA <- data %>%
rename_with(~gsub("needs_", "", .)) %>%
select(rel_deficit1:comp_growth9) %>%
slice(row_indices)
x <- data_EFA %>%
correlation(p_adjust = "none") %>%
summary() %>%
plot()
plotly::ggplotly(x)
data_EFA %>%
cormatrix_excel("items_matrix", print.mat = FALSE)
dataset.labels$needs_rel_deficit6
cortest.bartlett(data_EFA)
x <- KMO(data_EFA)
# Overal KMO
x$MSA %>%
round(2)
x$MSAi %>%
sort(decreasing = TRUE) %>%
round(2)
# determinant of the correlation matrix
r_matrix <- cor(data_EFA, use = "pairwise.complete.obs")
det(r_matrix)
pc1 <- principal(r_matrix, nfactors = 50, rotate = "none")
print(pc1, cut = 0.2)
# Determine Number of Factors to Extract
ev <- eigen(cor(data_EFA)) # get eigenvalues
ap <- parallel(subject = nrow(data_EFA), var = ncol(data_EFA),
rep = 100, cent = .05)
nS <- nScree(x = ev$values, aparallel = ap$eigen$qevpea)
plotnScree(nS)
pc1$Vaccounted %>%
as.data.frame() %>%
select(PC4) %>%
filter(row.names(.) == "Cumulative Proportion")
# ev$values # print eigenvalues
# Determine eigen values > 1
HighEigenValues <- ev$values[ev$values > 1]
HighEigenValues
length(HighEigenValues)
pc1$Vaccounted %>%
as.data.frame() %>%
select(PC7) %>%
filter(row.names(.) == "Cumulative Proportion")
HighEigenValues <- ev$values[ev$values > 0.7]
HighEigenValues
length(HighEigenValues)
pc1$Vaccounted %>%
as.data.frame() %>%
select(PC11) %>%
filter(row.names(.) == "Cumulative Proportion")
# Communalities
dataForScreePlot <- factanal(data_EFA, factors = 4, rotation = "varimax")
itemCommunalities <- 1 - dataForScreePlot$uniquenesses;
# List items with low communalities ( < .70).
itemsWithLowCommunalities <- itemCommunalities[itemCommunalities < .70]
itemsWithLowCommunalities
length(itemsWithLowCommunalities)
mean(itemCommunalities)
vss(data_EFA)
pc2 <- principal(data_EFA, nfactors = 4, rotate = "none")
print(pc2, cut = 0.2)
# obtain the residuals
residuals <- factor.residuals(r_matrix, pc2$loadings)
residuals %>%
as.data.frame() %>%
head() %>%
select(1)
# create an object that contains the factor residuals
residuals <- as.matrix(residuals[upper.tri(residuals)])
# how many large residuals there are
large.resid <- abs(residuals) > 0.05
sum(large.resid)
sum(large.resid)/nrow(residuals)
# Compute root-mean-square residual
sqrt(mean(residuals^2))
# plot a quick histogram of the residuals
hist(residuals)
# Maximum Likelihood Factor Analysis
# entering raw data and extracting 4 factors,
# with oblique (oblimin) rotation
fit <- fa(data_EFA,
nfactors = 4,
rotate = "oblimin",
fm = "ml")
print(fit, digits=2, sort=TRUE, cut = 0.3)
fa.diagram(fit)
fit <- fa(data_EFA,
nfactors = 2,
rotate = "oblimin",
fm = "ml")
print(fit, digits=2, sort=TRUE, cut = 0.3)
fa.diagram(fit)
fit <- fa(data_EFA,
nfactors = 6,
rotate = "oblimin",
fm = "ml")
print(fit, digits=2, sort=TRUE, cut = 0.3)
fa.diagram(fit)
multi.results <- fa.multi(
data_EFA,
nfactors = 6,
nfact2 = 2,
fm = "ml",
rotate = "oblimin"
)
colnames(multi.results$f2$loadings) <- c(
"Competence-\nAutonomy", "Relatedness")
fa.multi.diagram(
multi.results,
flabels = c("Competence-\nGrowth", "Autonomy-\nDeficit",
"Competence-\nDeficit", "Relatedness-\nGrowth",
"Relatedness-\nDeficit", "Autonomy-\nGrowth"),
e.size = .09,
main = "Hierarchical (multilevel) Exploratory Structure of the Needs Orientation Scale",
color.lines = FALSE,
cut = 0.4
)
pdf(file = "growth_EFA.pdf",
width = 15,
height = 15,
colormodel="rgb")
fa.multi.diagram(
multi.results,
flabels = c("Competence-\nGrowth", "Autonomy-\nDeficit",
"Competence-\nDeficit", "Relatedness-\nGrowth",
"Relatedness-\nDeficit", "Autonomy-\nGrowth"),
e.size = .09,
main = "Hierarchical (multilevel) Exploratory Structure of the Needs Orientation Scale",
color.lines = FALSE,
cut = 0.4
)
dev.off()
data_EFA_5 <- data_EFA %>%
select(comp_growth5, comp_growth3, comp_growth2,
comp_growth6, comp_growth7,
aut_deficit5, aut_deficit9, aut_deficit2,
aut_deficit3, aut_deficit6,
comp_deficit1, comp_deficit7, comp_deficit2,
comp_deficit8, comp_deficit9,
rel_growth2, rel_growth5, rel_growth1,
rel_growth3, rel_growth7,
rel_deficit3, rel_deficit5, rel_deficit2,
rel_deficit4, rel_deficit7,
aut_growth1, aut_growth4, aut_growth5,
aut_growth8, aut_growth2)
multi.results <- fa.multi(
data_EFA_5,
nfactors = 6,
nfact2 = 2,
fm = "ml",
rotate = "oblimin"
)
colnames(multi.results$f2$loadings) <- c(
"Competence-\nAutonomy", "Relatedness")
needs_dims <- c(
"Competence-Growth", "Relatedness-Growth",
"Competence-Deficit", "Autonomy-Deficit",
"Autonomy-Growth", "Relatedness-Deficit")
rownames(multi.results$f2$loadings) <- needs_dims
fa.multi.diagram(
multi.results,
flabels = gsub("-", "-\n", needs_dims),
e.size = .09,
main = "Hierarchical (multilevel) Exploratory Structure of the Needs Orientation Scale",
color.lines = FALSE,
cut = 0.4,
gcut = 0.1
)
pdf(file = "growth_EFA_5.pdf",
width = 12,
height = 9,
colormodel="rgb")
fa.multi.diagram(
multi.results,
flabels = gsub("-", "-\n", needs_dims),
e.size = .09,
main = "Hierarchical (multilevel) Exploratory Structure of the Needs Orientation Scale",
color.lines = FALSE,
cut = 0.4,
gcut = 0.1
)
dev.off()
# OK (no r > .9, no item with no r > .3)
data_EFA_5 %>%
cormatrix_excel("items_matrix_5-item", print.mat = FALSE)
# OK (p < .05)
cortest.bartlett(data_EFA_5)
# OK (none < .5)
x <- KMO(data_EFA_5)
x$MSA %>%
round(2)
x$MSAi %>%
sort(decreasing = TRUE) %>%
round(2)
# Better than before but still not > 0.00001
r_matrix <- cor(data_EFA_5, use = "pairwise.complete.obs")
det(r_matrix)
# Fit for 6-factor solution is OK (> .95)
multi.results$f1$fit
# Fit for higher-order 2-factor solution is not so good (< .95)
multi.results$f2$fit
# Relatedness Growth
dataset.labels %>%
rename_with(~gsub("needs_", "", .)) %>%
select(rel_growth1, rel_growth5, rel_growth2,
rel_growth3, rel_growth7)
# Relatedness Deficit-Reduction
dataset.labels %>%
rename_with(~gsub("needs_", "", .)) %>%
select(rel_deficit5, rel_deficit3, rel_deficit4,
rel_deficit7)
# Competence Growth
dataset.labels %>%
rename_with(~gsub("needs_", "", .)) %>%
select(comp_growth3, comp_growth2, comp_growth5,
comp_growth6, comp_growth7)
# Competence Deficit-Reduction
dataset.labels %>%
rename_with(~gsub("needs_", "", .)) %>%
select(comp_deficit1, comp_deficit7, comp_deficit2,
comp_deficit8, comp_deficit9)
# Autonomy Growth
dataset.labels %>%
rename_with(~gsub("needs_", "", .)) %>%
select(aut_growth4, aut_growth5, aut_growth1,
aut_growth2, aut_growth8)
# Autonomy Deficit-Reduction
dataset.labels %>%
rename_with(~gsub("needs_", "", .)) %>%
select(aut_deficit5, aut_deficit9, aut_deficit2,
aut_deficit3, aut_deficit6)
data_CFA <- data %>%
rename_with(~gsub("needs_", "", .)) %>%
select(rel_deficit1:comp_growth9) %>%
slice(-row_indices)
needs <- c("comp", "rel", "aut")
latent <- list(comp_growth = paste0("comp_growth", c(2:3, 5:7)),
rel_growth = paste0("rel_growth", c(1:3, 5, 7)),
aut_growth = paste0("aut_growth", c(1:2, 4:5, 8)),
comp_deficit = paste0("comp_deficit", c(1:2, 7:9)),
rel_deficit = paste0("rel_deficit", c(2:5, 7)),
aut_deficit = paste0("aut_deficit", c(2:3, 5:6, 9)),
growth = paste0(needs, "_growth"),
deficit = paste0(needs, "_deficit"))
covariance <- list(growth = "deficit"#,
#aut_growth = "aut_deficit",
#comp_growth = "comp_deficit",
#rel_growth = "rel_deficit"#,
# aut_deficit = "rel_deficit",
# comp_deficit = "rel_deficit",
# aut_deficit = "comp_deficit"
)
cfa.model <- write_lavaan(latent = latent, covariance = covariance)
cat(cfa.model)
# Fit model
fit.cfa <- cfa_fit_plot(cfa.model, data_CFA)
nice_lavaanPlot(fit.cfa)
cfa_fit_plot(cfa.model, data_CFA, save.as.pdf = TRUE, print = FALSE,
file.name = "cfa_model")
nice_fit(fit.cfa, nice_table = TRUE)
x <- modindices(fit.cfa, sort = TRUE, maximum.number = 10)
dataset.labels <- dataset.labels %>%
rename_with(~gsub(pattern = "needs_", replacement = "", .x),
starts_with("needs"))
add_item_labels <- function(x, labels = data_labels, method = "lcs") {
for (i in seq(nrow(x))) {
x[i, "lhs.labels"] <- ifelse(
x$lhs[i] %in% names(labels),
labels[which(x$lhs[i] == names(labels))],
NA)
x[i, "rhs.labels"] <- ifelse(
x$rhs[i] %in% names(labels),
labels[which(x$rhs[i] == names(labels))],
NA)
}
x <- x[c(1:4, 9:10)]
x$similarity <- stringdist::stringsim(x$lhs.labels, x$rhs.labels)
x$similar <- x$similarity > .50
x$similar <- ifelse(is.na(x$similar), FALSE, x$similar <- x$similar)
rownames(x) <- NULL
x
}
add_item_labels(x, labels = dataset.labels)
dataset.labels$comp_deficit6
latent$comp_deficit[3] <- "comp_deficit6"
covariance <- c(covariance, aut_growth = "aut_deficit", rel_growth = "rel_deficit")
cfa.model <- write_lavaan(latent = latent, covariance = covariance)
cat(cfa.model)
# Fit model
fit.cfa <- cfa_fit_plot(cfa.model, data_CFA)
nice_lavaanPlot(fit.cfa)
cfa_fit_plot(cfa.model, data_CFA, save.as.pdf = TRUE, print = FALSE,
file.name = "cfa_model2")
nice_fit(fit.cfa, nice_table = TRUE)
x <- modindices(fit.cfa, sort = TRUE, maximum.number = 10)
add_item_labels(x, labels = dataset.labels)
# Get alpha & omega for growth
data %>%
select(starts_with("needs_") & contains("growth")) %>%
omega(nfactors = 3)
# Get alpha & omega for deficit-reduction
data %>%
select(starts_with("needs_") & contains("deficit")) %>%
omega(nfactors = 3)
# Get mean of aut.growth
# Only items identified through CFA
data <- data %>%
mutate(aut_growth = rowMeans(
pick(paste0("needs_aut_growth", c(1:2, 4:5, 8))),
na.rm = TRUE))
# Think about using psych::scoreItems...
# data_test <- data %>%
# mutate(aut_growth = scoreItems(
# items = pick(paste0("needs_aut_growth", c(1:2, 4:5, 8)))))
# Get mean of aut.deficit
data <- data %>%
mutate(aut_deficit = rowMeans(
pick(paste0("needs_aut_deficit", c(2:3, 5:6, 9))),
na.rm = TRUE))
# Get mean of comp.growth
# Only items identified through CFA
data <- data %>%
mutate(comp_growth = rowMeans(
pick(paste0("needs_comp_growth", c(2:3, 5:6, 7))),
na.rm = TRUE))
# Get mean of aut.deficit
data <- data %>%
mutate(comp_deficit = rowMeans(
pick(paste0("needs_comp_deficit", c(1:2, 7:9))),
na.rm = TRUE))
# Get mean of rel.growth
# Only items identified through CFA
data <- data %>%
mutate(rel_growth = rowMeans(
pick(paste0("needs_rel_growth", c(1:3, 5, 7))),
na.rm = TRUE))
# Get mean of rel.deficit
data <- data %>%
mutate(rel_deficit = rowMeans(
pick(paste0("needs_rel_deficit", c(2:5, 7))),
na.rm = TRUE))
# Get alpha & omega
data %>%
select(starts_with("P_")) %>%
omega(nfactors = 2)
# Obsessive Passion
# Get mean
data <- data %>%
mutate(OP = rowMeans(
pick(paste0("P_O", 1:6)),
na.rm = TRUE))
# Harmonious Passion
# Get mean
data <- data %>%
mutate(HP = rowMeans(
pick(paste0("P_H", 1:6)),
na.rm = TRUE))
# Get alpha & omega
data %>%
select(WB_1:WB_4) %>%
omega(nfactors = 1)
data <- data %>%
mutate(ofis.wellbeing = rowMeans(
pick(WB_1:WB_4),
na.rm = TRUE))
data %>%
select(contains("GMS_")) %>%
names
# Get alpha & omega
data %>%
select(contains("GMS_")) %>%
omega(nfactors = 6)
data <- data %>%
mutate(GMS_identified = rowMeans(
pick(starts_with("GMS_ID")),
na.rm = TRUE),
GMS_intrinsic = rowMeans(
pick(starts_with("GMS_INX")),
na.rm = TRUE),
GMS_external = rowMeans(
pick(starts_with("GMS_EXT")),
na.rm = TRUE),
GMS_introjected = rowMeans(
pick(starts_with("GMS_TRO")),
na.rm = TRUE),
GMS_integrated = rowMeans(
pick(starts_with("GMS_INT")),
na.rm = TRUE),
GMS_amotivation = rowMeans(
pick(starts_with("GMS_AMO")),
na.rm = TRUE))
data %>%
select(starts_with("N") & contains(c( "_S", "_F"))) %>%
names
data %>%
select(starts_with("N") & contains("_F")) %>%
names
# Get alpha & omega
data %>%
select(starts_with("N") & contains(c( "_S", "_F"))) %>%
omega(nfactors = 6)
data <- data %>%
mutate(GNSFS_satisfaction = rowMeans(
pick(starts_with("N") & contains("_S")),
na.rm = TRUE),
GNSFS_frustration = rowMeans(
pick(starts_with("N") & contains("_F")),
na.rm = TRUE),
GNSFS_satisfaction_aut = rowMeans(
pick(starts_with("N") & contains("_SA")),
na.rm = TRUE),
GNSFS_satisfaction_rel = rowMeans(
pick(starts_with("N") & contains("_SR")),
na.rm = TRUE),
GNSFS_satisfaction_comp = rowMeans(
pick(starts_with("N") & contains("_SC")),
na.rm = TRUE),
GNSFS_frustration_aut = rowMeans(
pick(starts_with("N") & contains("_FA")),
na.rm = TRUE),
GNSFS_frustration_rel = rowMeans(
pick(starts_with("N") & contains("_FR")),
na.rm = TRUE),
GNSFS_frustration_comp = rowMeans(
pick(starts_with("N") & contains("_FC")),
na.rm = TRUE))
# Get alpha & omega
data %>%
select(MS_AF1:MS_AF3) %>%
omega(nfactors = 1)
data <- data %>%
mutate(mindset = rowMeans(
pick(MS_AF1:MS_AF3),
na.rm = TRUE),
mindset = nice_reverse(mindset, 6))
data %>%
select(starts_with("BF_") & ends_with("r")) %>%
names
data %>%
select(starts_with("BF_N")) %>%
names
data <- data %>%
mutate(across(contains("BF_") & ends_with("r"),
~nice_reverse(.x, 5), .names = "{col}"))
# Get alpha & omega
data %>%
select(starts_with("BF_")) %>%
omega(nfactors = 5)
data <- data %>%
mutate(BF_openness = rowMeans(
pick(starts_with("BF_O")),
na.rm = TRUE),
BF_conscientiousness = rowMeans(
pick(starts_with("BF_C")),
na.rm = TRUE),
BF_extroversion = rowMeans(
pick(starts_with("BF_E")),
na.rm = TRUE),
BF_agreableness = rowMeans(
pick(starts_with("BF_A")),
na.rm = TRUE),
BF_neuroticism = rowMeans(
pick(starts_with("BF_N")),
na.rm = TRUE))
data %>%
select(starts_with("Tem_")) %>%
names
data %>%
select(starts_with("Tem_App")) %>%
names
# Get alpha & omega
data %>%
select(starts_with("Tem_")) %>%
omega(nfactors = 2)
data <- data %>%
mutate(AATQ_avoidance = rowMeans(
pick(starts_with("Tem_Avo")),
na.rm = TRUE),
AATQ_approach = rowMeans(
pick(starts_with("Tem_App")),
na.rm = TRUE))
data %>%
select(starts_with("MPO_")) %>%
names
data %>%
select(starts_with("MPO_PAp")) %>%
names
# Get alpha & omega
data %>%
select(starts_with("MPO_")) %>%
omega(nfactors = 2)
data <- data %>%
mutate(MPOS_mastery_approach = rowMeans(
pick(starts_with("MPO_MAp")),
na.rm = TRUE),
MPOS_mastery_avoidance = rowMeans(
pick(starts_with("MPO_MAv")),
na.rm = TRUE),
MPOS_performance_approach = rowMeans(
pick(starts_with("MPO_PAp")),
na.rm = TRUE),
MPOS_performance_avoidance = rowMeans(
pick(starts_with("MPO_PAv")),
na.rm = TRUE))
data %>%
select(starts_with("Pers_")) %>%
names
data %>%
select(starts_with("Pers_R")) %>%
names
# Get alpha & omega
data %>%
select(starts_with("Pers_")) %>%
omega(nfactors = 2)
data <- data %>%
mutate(RFPS_flexible = rowMeans(
pick(starts_with("Pers_F")),
na.rm = TRUE),
RFPS_rigid = rowMeans(
pick(starts_with("Pers_R")),
na.rm = TRUE))
data %>%
select(starts_with("PGD_")) %>%
names
data %>%
select(starts_with("PGD_Pi")) %>%
names
# Get alpha & omega
data %>%
select(starts_with("PGD_")) %>%
omega(nfactors = 5)
data <- data %>%
mutate(PGDS_autonomy = rowMeans(
pick(starts_with("PGD_A")),
na.rm = TRUE),
PGDS_mastery = rowMeans(
pick(starts_with("PGD_E")),
na.rm = TRUE),
PGDS_positive = rowMeans(
pick(starts_with("PGD_PR")),
na.rm = TRUE),
PGDS_acceptance = rowMeans(
pick(starts_with("PGD_S")),
na.rm = TRUE),
PGDS_purpose = rowMeans(
pick(starts_with("PGD_Pi")),
na.rm = TRUE))
data %>%
select(starts_with("SAc_")) %>%
names
data %>%
select(starts_with("SAc_D")) %>%
names
data <- data %>%
mutate(across(starts_with("SAc_") & ends_with("r"),
~nice_reverse(.x, 4), .names = "{col}"))
# Get alpha & omega
data %>%
select(starts_with("SAc_")) %>%
omega(nfactors = 5)
data <- data %>%
mutate(SAS = rowMeans(
pick(starts_with("SAc_")),
na.rm = TRUE),
SAS_emotions = rowMeans(
pick(starts_with("SAc_E")),
na.rm = TRUE),
SAS_autonomy = rowMeans(
pick(starts_with("SAc_A")),
na.rm = TRUE),
SAS_trust = rowMeans(
pick(starts_with("SAc_T")),
na.rm = TRUE),
SAS_acceptance = rowMeans(
pick(starts_with("SAc_AC")),
na.rm = TRUE),
SAS_desirability = rowMeans(
pick(starts_with("SAc_D")),
na.rm = TRUE))
data %>%
select(starts_with("GiL_")) %>%
names
# Get alpha & omega
data %>%
select(starts_with("GiL_")) %>%
omega(nfactors = 1)
data <- data %>%
mutate(growth_life = rowMeans(
pick(starts_with("GiL")),
na.rm = TRUE))
data %>%
select(starts_with("GMI_")) %>%
names
data %>%
select(starts_with("GMI_Exp")) %>%
names
# Get alpha & omega
data %>%
select(starts_with("GMI_")) %>%
omega(nfactors = 2)
data <- data %>%
mutate(GMI = rowMeans(
pick(starts_with("GMI_")),
na.rm = TRUE),
GMI_reflective = rowMeans(
pick(starts_with("GMI_Ref")),
na.rm = TRUE),
GMI_experiential = rowMeans(
pick(starts_with("GMI_Exp")),
na.rm = TRUE))
data %>%
select(starts_with("Gr_")) %>%
names
# Get alpha & omega
data %>%
select(starts_with("Gr_")) %>%
omega(nfactors = 1)
data <- data %>%
mutate(growth_oldham = rowMeans(
pick(starts_with("Gr_")),
na.rm = TRUE))
data %>%
select(starts_with("PEQ_")) %>%
names
data %>%
select(starts_with("PEQ_N")) %>%
names
data <- data %>%
mutate(across(contains("PEQ_") & ends_with("r"),
~nice_reverse(.x, 7), .names = "{col}"))
# Get alpha & omega
data %>%
select(starts_with("PEQ_"), -contains("ATT")) %>%
omega(nfactors = 2)
data <- data %>%
mutate(PEQ = rowMeans(
pick(starts_with("PEQ_"), -contains("ATT")),
na.rm = TRUE),
PEQ_augmentation = rowMeans(
pick(starts_with("PEQ_Au")),
na.rm = TRUE),
PEQ_novelty = rowMeans(
pick(starts_with("PEQ_N")),
na.rm = TRUE))
data %>%
select(starts_with("PGI_")) %>%
names
# Get alpha & omega
data %>%
select(starts_with("PGI_")) %>%
omega(nfactors = 1)
data <- data %>%
mutate(PGI = rowMeans(
pick(starts_with("PGI_")),
na.rm = TRUE))
# Make list of DVs
col.list <- data %>%
select(aut_growth:PGI) %>%
names()
lapply(col.list, function(x)
nice_normality(data,
variable = x,
title = x,
shapiro = TRUE,
histogram = TRUE))
predict_bestNormalize <- function(var) {
x <- bestNormalize(var, standardize = FALSE, allow_orderNorm = FALSE)
print(cur_column())
print(x$chosen_transform)
cat("\n")
y <- predict(x)
attr(y, "transform") <- c(attributes(y), attributes(x$chosen_transform)$class[1])
y
}
set.seed(42)
data <- data %>%
mutate(across(all_of(col.list),
predict_bestNormalize,
.names = "{.col}.t"))
col.list <- paste0(col.list, ".t")
# Group normality
named.col.list <- setNames(col.list, unlist(lapply(data, function(x) attributes(x)$transform)))
lapply(named.col.list, function(x)
nice_normality(data,
variable = x,
title = x,
shapiro = TRUE,
histogram = TRUE))
plots(lapply(col.list, function(x) {
plot_outliers(data, response = x, ytitle = x, binwidth = 0.3)
}),
n_columns = 2)
data %>%
as.data.frame %>%
find_mad(col.list, criteria = 3)
# Winsorize variables of interest with MAD
data <- data %>%
mutate(across(all_of(col.list),
winsorize_mad,
.names = "{.col}.w")) %>%
ungroup()
# Update col.list
col.list <- paste0(col.list, ".w")
data.vars <- data[col.list]
col.list <- gsub(".t.w", "", col.list)
names(data.vars) <- col.list
# Error in solve.default(cov, ...) :
# system is computationally singular: reciprocal condition number
# We have to remove global averages to make it work
# We have to exclude two variables that are too large following the transformations, the GNSFS_comp variables...
# data.vars[c(7:8, 52, 22, 45, 25)] %>%
# names()
outliers <- data.vars %>% # [-c(7:8, 52, 22, 45, 25)] %>%
select(-c(#growth.orientation, deficit.orientation,
GMI, SAS, #PEQ,
GNSFS_satisfaction_comp,
GNSFS_frustration_comp
)) %>%
na.omit() %>%
check_outliers(method = "mahalanobis")
outliers
plot(outliers)
73 / nrow(data)
data.vars <- data.vars[-which(outliers), ]
report::report_participants(data.vars)
data.vars <- data.vars %>%
mutate(across(all_of(col.list), standardize))